]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: remove faulty restriction
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
b670016a 71static const char loDev[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
335 char peerbuf[IFNAMSIZ], *peer;
336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peerbuf))
346 return -1;
347
348 peer = lxc_mkifname(peerbuf);
349 if (!peer)
350 return -1;
351
352 err = lxc_macvlan_create(netdev->link, peer,
353 netdev->priv.macvlan_attr.mode);
354 if (err) {
6d1400b5 355 errno = -err;
356 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
357 peer, netdev->link);
966e9f1f 358 goto on_error;
811ef482
CB
359 }
360
361 netdev->ifindex = if_nametoindex(peer);
362 if (!netdev->ifindex) {
363 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 364 goto on_error;
811ef482
CB
365 }
366
3bef7b7b
TP
367 if (netdev->mtu) {
368 err = lxc_safe_uint(netdev->mtu, &mtu);
369 if (err < 0) {
370 errno = -err;
371 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
372 goto on_error;
373 }
374
375 err = lxc_netdev_set_mtu(peer, mtu);
376 if (err < 0) {
377 errno = -err;
378 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
379 goto on_error;
380 }
381 }
382
811ef482 383 if (netdev->upscript) {
14a7b0f9
CB
384 char *argv[] = {
385 "macvlan",
386 netdev->link,
387 NULL,
388 };
389
390 err = run_script_argv(handler->name,
391 handler->conf->hooks_version, "net",
392 netdev->upscript, "up", argv);
393 if (err < 0)
966e9f1f 394 goto on_error;
811ef482
CB
395 }
396
397 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
398 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
399
400 return 0;
966e9f1f
CB
401
402on_error:
811ef482 403 lxc_netdev_delete_by_name(peer);
811ef482
CB
404 return -1;
405}
406
c9f52382 407static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
408{
409 int err, index, len;
410 struct ifinfomsg *ifi;
411 struct nl_handler nlh;
412 struct rtattr *nest, *nest2;
413 struct nlmsg *answer = NULL, *nlmsg = NULL;
414
415 len = strlen(master);
416 if (len == 1 || len >= IFNAMSIZ)
417 return minus_one_set_errno(EINVAL);
418
419 len = strlen(name);
420 if (len == 1 || len >= IFNAMSIZ)
421 return minus_one_set_errno(EINVAL);
422
423 index = if_nametoindex(master);
424 if (!index)
425 return minus_one_set_errno(EINVAL);
426
427 err = netlink_open(&nlh, NETLINK_ROUTE);
428 if (err)
429 return minus_one_set_errno(-err);
430
431 err = -ENOMEM;
432 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
433 if (!nlmsg)
434 goto out;
435
436 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
437 if (!answer)
438 goto out;
439
440 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
441 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
442
443 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
444 if (!ifi) {
445 goto out;
446 }
447 ifi->ifi_family = AF_UNSPEC;
448
449 err = -EPROTO;
450 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
451 if (!nest)
452 goto out;
453
454 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
455 goto out;
456
457 if (mode) {
458 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
459 if (!nest2)
460 goto out;
461
462 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
463 goto out;
464
465 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
466 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
467 */
468 if (isolation > 0) {
469 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
470 goto out;
471 }
472
473 nla_end_nested(nlmsg, nest2);
474 }
475
476 nla_end_nested(nlmsg, nest);
477
478 if (nla_put_u32(nlmsg, IFLA_LINK, index))
479 goto out;
480
481 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
482 goto out;
483
484 err = netlink_transaction(&nlh, nlmsg, answer);
485out:
486 netlink_close(&nlh);
487 nlmsg_free(answer);
488 nlmsg_free(nlmsg);
489 if (err < 0)
490 return minus_one_set_errno(-err);
491 return 0;
492}
493
494static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
495{
496 char peerbuf[IFNAMSIZ], *peer;
497 int err;
006e135e 498 unsigned int mtu = 0;
c9f52382 499
500 if (netdev->link[0] == '\0') {
501 ERROR("No link for ipvlan network device specified");
502 return -1;
503 }
504
505 err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
506 if (err < 0 || (size_t)err >= sizeof(peerbuf))
507 return -1;
508
509 peer = lxc_mkifname(peerbuf);
510 if (!peer)
511 return -1;
512
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
514 if (err) {
515 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
516 goto on_error;
517 }
518
519 netdev->ifindex = if_nametoindex(peer);
520 if (!netdev->ifindex) {
521 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
522 goto on_error;
523 }
524
006e135e 525 if (netdev->mtu) {
526 err = lxc_safe_uint(netdev->mtu, &mtu);
527 if (err < 0) {
528 errno = -err;
529 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
530 goto on_error;
531 }
532
533 err = lxc_netdev_set_mtu(peer, mtu);
534 if (err < 0) {
535 errno = -err;
536 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
537 goto on_error;
538 }
539 }
540
c9f52382 541 if (netdev->upscript) {
542 char *argv[] = {
543 "ipvlan",
544 netdev->link,
545 NULL,
546 };
547
548 err = run_script_argv(handler->name,
549 handler->conf->hooks_version, "net",
550 netdev->upscript, "up", argv);
551 if (err < 0)
552 goto on_error;
553 }
554
555 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
556 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
557
558 return 0;
559
560on_error:
561 lxc_netdev_delete_by_name(peer);
562 return -1;
563}
564
811ef482
CB
565static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
566{
567 char peer[IFNAMSIZ];
568 int err;
569 static uint16_t vlan_cntr = 0;
570 unsigned int mtu = 0;
571
de4855a8 572 if (netdev->link[0] == '\0') {
811ef482
CB
573 ERROR("No link for vlan network device specified");
574 return -1;
575 }
576
577 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
578 if (err < 0 || (size_t)err >= sizeof(peer))
579 return -1;
580
581 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
582 if (err) {
6d1400b5 583 errno = -err;
584 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
585 peer, netdev->link);
811ef482
CB
586 return -1;
587 }
588
589 netdev->ifindex = if_nametoindex(peer);
590 if (!netdev->ifindex) {
591 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 592 goto on_error;
593 }
594
595 if (netdev->mtu) {
596 err = lxc_safe_uint(netdev->mtu, &mtu);
597 if (err < 0) {
598 errno = -err;
599 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
600 goto on_error;
601 }
602
603 err = lxc_netdev_set_mtu(peer, mtu);
604 if (err) {
605 errno = -err;
606 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
607 goto on_error;
608 }
811ef482
CB
609 }
610
3a73d9f1 611 if (netdev->upscript) {
612 char *argv[] = {
613 "vlan",
614 netdev->link,
615 NULL,
616 };
617
618 err = run_script_argv(handler->name,
619 handler->conf->hooks_version, "net",
620 netdev->upscript, "up", argv);
19abca58 621 if (err < 0) {
3e2a7b08 622 goto on_error;
19abca58 623 }
3a73d9f1 624 }
625
3bef7b7b 626 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
811ef482 627 peer, netdev->ifindex);
811ef482
CB
628
629 return 0;
3e2a7b08 630
631on_error:
632 lxc_netdev_delete_by_name(peer);
633 return -1;
811ef482
CB
634}
635
636static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
637{
0b154989 638 int err, mtu_orig = 0;
3bef7b7b 639 unsigned int mtu = 0;
14a7b0f9 640
de4855a8 641 if (netdev->link[0] == '\0') {
811ef482
CB
642 ERROR("No link for physical interface specified");
643 return -1;
644 }
645
790255cf
CB
646 /* Note that we're retrieving the container's ifindex in the host's
647 * network namespace because we need it to move the device from the
648 * host's network namespace to the container's network namespace later
649 * on.
650 * Note that netdev->link will contain the name of the physical network
651 * device in the host's namespace.
652 */
811ef482
CB
653 netdev->ifindex = if_nametoindex(netdev->link);
654 if (!netdev->ifindex) {
655 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
656 return -1;
657 }
658
790255cf
CB
659 /* Store the ifindex of the host's network device in the host's
660 * namespace.
661 */
662 netdev->priv.phys_attr.ifindex = netdev->ifindex;
663
0b154989
TP
664 /* Get original device MTU setting and store for restoration after container shutdown. */
665 mtu_orig = netdev_get_mtu(netdev->ifindex);
666 if (mtu_orig < 0) {
667 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
668 return minus_one_set_errno(-mtu_orig);
669 }
670
671 netdev->priv.phys_attr.mtu = mtu_orig;
672
3bef7b7b
TP
673 if (netdev->mtu) {
674 err = lxc_safe_uint(netdev->mtu, &mtu);
675 if (err < 0) {
676 errno = -err;
677 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
678 return -1;
679 }
14a7b0f9 680
3bef7b7b
TP
681 err = lxc_netdev_set_mtu(netdev->link, mtu);
682 if (err < 0) {
683 errno = -err;
684 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
685 return -1;
686 }
687 }
688
689 if (netdev->upscript) {
690 char *argv[] = {
691 "phys",
692 netdev->link,
693 NULL,
694 };
695
696 err = run_script_argv(handler->name,
697 handler->conf->hooks_version, "net",
698 netdev->upscript, "up", argv);
699 if (err < 0) {
700 return -1;
701 }
702 }
703
704 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
811ef482
CB
705
706 return 0;
707}
708
709static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
710{
14a7b0f9
CB
711 int ret;
712 char *argv[] = {
713 "empty",
714 NULL,
715 };
716
811ef482 717 netdev->ifindex = 0;
14a7b0f9
CB
718 if (!netdev->upscript)
719 return 0;
720
721 ret = run_script_argv(handler->name, handler->conf->hooks_version,
722 "net", netdev->upscript, "up", argv);
723 if (ret < 0)
724 return -1;
725
811ef482
CB
726 return 0;
727}
728
729static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
730{
731 netdev->ifindex = 0;
732 return 0;
733}
734
735static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
736 [LXC_NET_VETH] = instantiate_veth,
737 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 738 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
739 [LXC_NET_VLAN] = instantiate_vlan,
740 [LXC_NET_PHYS] = instantiate_phys,
741 [LXC_NET_EMPTY] = instantiate_empty,
742 [LXC_NET_NONE] = instantiate_none,
743};
744
745static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
746{
14a7b0f9
CB
747 int ret;
748 char *argv[] = {
749 "veth",
750 netdev->link,
751 NULL,
752 NULL,
753 };
754
755 if (!netdev->downscript)
756 return 0;
811ef482 757
de4855a8 758 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 759 argv[2] = netdev->priv.veth_attr.pair;
811ef482 760 else
14a7b0f9
CB
761 argv[2] = netdev->priv.veth_attr.veth1;
762
763 ret = run_script_argv(handler->name,
764 handler->conf->hooks_version, "net",
765 netdev->downscript, "down", argv);
766 if (ret < 0)
767 return -1;
811ef482 768
811ef482
CB
769 return 0;
770}
771
772static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
773{
14a7b0f9
CB
774 int ret;
775 char *argv[] = {
776 "macvlan",
777 netdev->link,
778 NULL,
779 };
780
781 if (!netdev->downscript)
782 return 0;
783
784 ret = run_script_argv(handler->name, handler->conf->hooks_version,
785 "net", netdev->downscript, "down", argv);
786 if (ret < 0)
787 return -1;
811ef482 788
811ef482
CB
789 return 0;
790}
791
c9f52382 792static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
793{
794 int ret;
795 char *argv[] = {
796 "ipvlan",
797 netdev->link,
798 NULL,
799 };
800
801 if (!netdev->downscript)
802 return 0;
803
804 ret = run_script_argv(handler->name, handler->conf->hooks_version,
805 "net", netdev->downscript, "down", argv);
806 if (ret < 0)
807 return -1;
808
809 return 0;
810}
811
811ef482
CB
812static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
813{
3a73d9f1 814 int ret;
815 char *argv[] = {
816 "vlan",
817 netdev->link,
818 NULL,
819 };
820
821 if (!netdev->downscript)
822 return 0;
823
824 ret = run_script_argv(handler->name, handler->conf->hooks_version,
825 "net", netdev->downscript, "down", argv);
826 if (ret < 0)
827 return -1;
828
811ef482
CB
829 return 0;
830}
831
832static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
833{
14a7b0f9
CB
834 int ret;
835 char *argv[] = {
836 "phys",
837 netdev->link,
838 NULL,
839 };
840
841 if (!netdev->downscript)
842 return 0;
843
844 ret = run_script_argv(handler->name, handler->conf->hooks_version,
845 "net", netdev->downscript, "down", argv);
846 if (ret < 0)
847 return -1;
811ef482 848
811ef482
CB
849 return 0;
850}
851
852static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
853{
14a7b0f9
CB
854 int ret;
855 char *argv[] = {
856 "empty",
857 NULL,
858 };
859
860 if (!netdev->downscript)
861 return 0;
862
863 ret = run_script_argv(handler->name, handler->conf->hooks_version,
864 "net", netdev->downscript, "down", argv);
865 if (ret < 0)
866 return -1;
811ef482 867
811ef482
CB
868 return 0;
869}
870
871static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
872{
873 return 0;
874}
875
876static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
877 [LXC_NET_VETH] = shutdown_veth,
878 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 879 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
880 [LXC_NET_VLAN] = shutdown_vlan,
881 [LXC_NET_PHYS] = shutdown_phys,
882 [LXC_NET_EMPTY] = shutdown_empty,
883 [LXC_NET_NONE] = shutdown_none,
884};
885
0037ab49
TP
886static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
887{
888 int err;
889 struct nl_handler nlh;
890 struct ifinfomsg *ifi;
891 struct nlmsg *nlmsg = NULL;
892
893 err = netlink_open(&nlh, NETLINK_ROUTE);
894 if (err)
895 return err;
896
897 err = -ENOMEM;
898 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
899 if (!nlmsg)
900 goto out;
901
902 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
903 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
904
905 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
906 if (!ifi)
907 goto out;
908 ifi->ifi_family = AF_UNSPEC;
909 ifi->ifi_index = ifindex;
910
911 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
912 goto out;
913
914 if (ifname != NULL) {
915 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
916 goto out;
917 }
918
919 err = netlink_transaction(&nlh, nlmsg, nlmsg);
920out:
921 netlink_close(&nlh);
922 nlmsg_free(nlmsg);
923 return err;
924}
925
ebc73a67 926int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 927{
ebc73a67 928 int err;
0ad19a3f 929 struct nl_handler nlh;
06f976ca 930 struct ifinfomsg *ifi;
ebc73a67 931 struct nlmsg *nlmsg = NULL;
0ad19a3f 932
3cfc0f3a
MN
933 err = netlink_open(&nlh, NETLINK_ROUTE);
934 if (err)
935 return err;
0ad19a3f 936
3cfc0f3a 937 err = -ENOMEM;
0ad19a3f 938 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
939 if (!nlmsg)
940 goto out;
941
ebc73a67 942 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
943 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
944
945 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
946 if (!ifi)
947 goto out;
06f976ca
SZ
948 ifi->ifi_family = AF_UNSPEC;
949 ifi->ifi_index = ifindex;
0ad19a3f 950
951 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
952 goto out;
953
8d357196
DY
954 if (ifname != NULL) {
955 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
956 goto out;
957 }
958
3cfc0f3a 959 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 960out:
961 netlink_close(&nlh);
962 nlmsg_free(nlmsg);
963 return err;
964}
965
ebc73a67
CB
966/* If we are asked to move a wireless interface, then we must actually move its
967 * phyN device. Detect that condition and return the physname here. The physname
968 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
969 */
970#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 971static char *is_wlan(const char *ifname)
e5848d39 972{
b0293710 973 __do_free char *path = NULL;
ebc73a67 974 int i, ret;
e5848d39 975 long physlen;
ebc73a67 976 size_t len;
e5848d39 977 FILE *f;
ebc73a67 978 char *physname = NULL;
e5848d39 979
ebc73a67 980 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 981 path = must_realloc(NULL, len + 1);
e5848d39 982 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 983 if (ret < 0 || (size_t)ret >= len)
e5848d39 984 goto bad;
ebc73a67 985
ebc73a67
CB
986 f = fopen(path, "r");
987 if (!f)
e5848d39 988 goto bad;
ebc73a67 989
1a0e70ac 990 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
991 fseek(f, 0, SEEK_END);
992 physlen = ftell(f);
993 fseek(f, 0, SEEK_SET);
7d1cde93
SX
994 if (physlen < 0) {
995 fclose(f);
0382c0da 996 goto bad;
7d1cde93 997 }
ebc73a67
CB
998
999 physname = malloc(physlen + 1);
ee54ea9a 1000 if (!physname) {
acf47e1b 1001 fclose(f);
e5848d39 1002 goto bad;
ee54ea9a 1003 }
ebc73a67
CB
1004
1005 memset(physname, 0, physlen + 1);
e5848d39
SH
1006 ret = fread(physname, 1, physlen, f);
1007 fclose(f);
1008 if (ret < 0)
1009 goto bad;
1010
ebc73a67 1011 for (i = 0; i < physlen; i++) {
e5848d39
SH
1012 if (physname[i] == '\n')
1013 physname[i] = '\0';
ebc73a67 1014
e5848d39
SH
1015 if (physname[i] == '\0')
1016 break;
1017 }
1018
1019 return physname;
1020
1021bad:
f10fad2f 1022 free(physname);
e5848d39
SH
1023 return NULL;
1024}
1025
ebc73a67
CB
1026static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1027 const char *new)
e5848d39 1028{
ebc73a67 1029 pid_t fpid;
e5848d39 1030
ebc73a67 1031 fpid = fork();
e5848d39
SH
1032 if (fpid < 0)
1033 return -1;
ebc73a67 1034
e5848d39
SH
1035 if (fpid != 0)
1036 return wait_for_pid(fpid);
ebc73a67 1037
e5848d39
SH
1038 if (!switch_to_ns(pid, "net"))
1039 return -1;
ebc73a67 1040
05ec44f8 1041 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1042}
1043
ebc73a67
CB
1044static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1045 const char *newname)
e5848d39 1046{
e5848d39 1047 char *cmd;
ebc73a67
CB
1048 pid_t fpid;
1049 int err = -1;
e5848d39
SH
1050
1051 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1052 * However, IIUC this involves a bit more complicated work to talk to
1053 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1054 */
1055 cmd = on_path("iw", NULL);
1056 if (!cmd)
1057 goto out1;
1058 free(cmd);
1059
1060 fpid = fork();
1061 if (fpid < 0)
1062 goto out1;
ebc73a67 1063
e5848d39
SH
1064 if (fpid == 0) {
1065 char pidstr[30];
1066 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1067 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1068 (char *)NULL);
ebd582ae 1069 _exit(EXIT_FAILURE);
e5848d39 1070 }
ebc73a67 1071
e5848d39
SH
1072 if (wait_for_pid(fpid))
1073 goto out1;
1074
1075 err = 0;
1076 if (newname)
1077 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1078
1079out1:
1080 free(physname);
1081 return err;
1082}
1083
8d357196 1084int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1085{
1086 int index;
e5848d39 1087 char *physname;
8befa924 1088
8befa924
SH
1089 if (!ifname)
1090 return -EINVAL;
1091
32571606 1092 index = if_nametoindex(ifname);
49428bf3
DY
1093 if (!index)
1094 return -EINVAL;
32571606 1095
ebc73a67
CB
1096 physname = is_wlan(ifname);
1097 if (physname)
e5848d39
SH
1098 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1099
8d357196 1100 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1101}
1102
b84f58b9 1103int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1104{
b84f58b9 1105 int err;
ebc73a67
CB
1106 struct ifinfomsg *ifi;
1107 struct nl_handler nlh;
1108 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1109
3cfc0f3a
MN
1110 err = netlink_open(&nlh, NETLINK_ROUTE);
1111 if (err)
1112 return err;
0ad19a3f 1113
3cfc0f3a 1114 err = -ENOMEM;
0ad19a3f 1115 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1116 if (!nlmsg)
1117 goto out;
1118
06f976ca 1119 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1120 if (!answer)
1121 goto out;
1122
ebc73a67 1123 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1124 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1125
1126 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1127 if (!ifi)
1128 goto out;
06f976ca
SZ
1129 ifi->ifi_family = AF_UNSPEC;
1130 ifi->ifi_index = ifindex;
0ad19a3f 1131
3cfc0f3a 1132 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1133out:
1134 netlink_close(&nlh);
1135 nlmsg_free(answer);
1136 nlmsg_free(nlmsg);
1137 return err;
1138}
1139
b84f58b9
DL
1140int lxc_netdev_delete_by_name(const char *name)
1141{
1142 int index;
1143
1144 index = if_nametoindex(name);
1145 if (!index)
1146 return -EINVAL;
1147
1148 return lxc_netdev_delete_by_index(index);
1149}
1150
1151int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1152{
ebc73a67 1153 int err, len;
06f976ca 1154 struct ifinfomsg *ifi;
ebc73a67
CB
1155 struct nl_handler nlh;
1156 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1157
3cfc0f3a
MN
1158 err = netlink_open(&nlh, NETLINK_ROUTE);
1159 if (err)
1160 return err;
b9a5bb58 1161
b84f58b9 1162 len = strlen(newname);
90d79629
CB
1163 if (len == 1 || len >= IFNAMSIZ) {
1164 err = -EINVAL;
b84f58b9 1165 goto out;
90d79629 1166 }
b84f58b9 1167
3cfc0f3a 1168 err = -ENOMEM;
b9a5bb58
DL
1169 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1170 if (!nlmsg)
1171 goto out;
1172
06f976ca 1173 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1174 if (!answer)
1175 goto out;
1176
ebc73a67 1177 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1178 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1179
1180 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1181 if (!ifi)
1182 goto out;
06f976ca
SZ
1183 ifi->ifi_family = AF_UNSPEC;
1184 ifi->ifi_index = ifindex;
b84f58b9
DL
1185
1186 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1187 goto out;
b9a5bb58 1188
3cfc0f3a 1189 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1190out:
1191 netlink_close(&nlh);
1192 nlmsg_free(answer);
1193 nlmsg_free(nlmsg);
1194 return err;
1195}
1196
b84f58b9
DL
1197int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1198{
1199 int len, index;
1200
1201 len = strlen(oldname);
dae3fdf6 1202 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1203 return -EINVAL;
1204
1205 index = if_nametoindex(oldname);
1206 if (!index)
1207 return -EINVAL;
1208
1209 return lxc_netdev_rename_by_index(index, newname);
1210}
1211
8befa924 1212int netdev_set_flag(const char *name, int flag)
0ad19a3f 1213{
ebc73a67 1214 int err, index, len;
06f976ca 1215 struct ifinfomsg *ifi;
ebc73a67
CB
1216 struct nl_handler nlh;
1217 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1218
3cfc0f3a
MN
1219 err = netlink_open(&nlh, NETLINK_ROUTE);
1220 if (err)
1221 return err;
0ad19a3f 1222
3cfc0f3a 1223 err = -EINVAL;
0ad19a3f 1224 len = strlen(name);
dae3fdf6 1225 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1226 goto out;
1227
3cfc0f3a 1228 err = -ENOMEM;
0ad19a3f 1229 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1230 if (!nlmsg)
1231 goto out;
1232
06f976ca 1233 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1234 if (!answer)
1235 goto out;
1236
3cfc0f3a 1237 err = -EINVAL;
0ad19a3f 1238 index = if_nametoindex(name);
1239 if (!index)
1240 goto out;
1241
ebc73a67 1242 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1243 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1244
1245 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1246 if (!ifi) {
1247 err = -ENOMEM;
1248 goto out;
1249 }
06f976ca
SZ
1250 ifi->ifi_family = AF_UNSPEC;
1251 ifi->ifi_index = index;
1252 ifi->ifi_change |= IFF_UP;
1253 ifi->ifi_flags |= flag;
0ad19a3f 1254
1255 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1256out:
1257 netlink_close(&nlh);
1258 nlmsg_free(nlmsg);
1259 nlmsg_free(answer);
1260 return err;
1261}
1262
ebc73a67 1263int netdev_get_flag(const char *name, int *flag)
efa1cf45 1264{
ebc73a67 1265 int err, index, len;
a4318300 1266 struct ifinfomsg *ifi;
ebc73a67
CB
1267 struct nl_handler nlh;
1268 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1269
1270 if (!name)
1271 return -EINVAL;
1272
1273 err = netlink_open(&nlh, NETLINK_ROUTE);
1274 if (err)
1275 return err;
1276
1277 err = -EINVAL;
1278 len = strlen(name);
1279 if (len == 1 || len >= IFNAMSIZ)
1280 goto out;
1281
1282 err = -ENOMEM;
1283 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1284 if (!nlmsg)
1285 goto out;
1286
06f976ca 1287 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1288 if (!answer)
1289 goto out;
1290
1291 err = -EINVAL;
1292 index = if_nametoindex(name);
1293 if (!index)
1294 goto out;
1295
06f976ca
SZ
1296 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1297 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1298
1299 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1300 if (!ifi) {
1301 err = -ENOMEM;
1302 goto out;
1303 }
06f976ca
SZ
1304 ifi->ifi_family = AF_UNSPEC;
1305 ifi->ifi_index = index;
efa1cf45
DY
1306
1307 err = netlink_transaction(&nlh, nlmsg, answer);
1308 if (err)
1309 goto out;
1310
06f976ca 1311 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1312
1313 *flag = ifi->ifi_flags;
1314out:
1315 netlink_close(&nlh);
1316 nlmsg_free(nlmsg);
1317 nlmsg_free(answer);
1318 return err;
1319}
1320
1321/*
1322 * \brief Check a interface is up or not.
1323 *
1324 * \param name: name for the interface.
1325 *
1326 * \return int.
1327 * 0 means interface is down.
1328 * 1 means interface is up.
1329 * Others means error happened, and ret-value is the error number.
1330 */
ebc73a67 1331int lxc_netdev_isup(const char *name)
efa1cf45 1332{
ebc73a67 1333 int err, flag;
efa1cf45
DY
1334
1335 err = netdev_get_flag(name, &flag);
1336 if (err)
ebc73a67
CB
1337 return err;
1338
efa1cf45
DY
1339 if (flag & IFF_UP)
1340 return 1;
ebc73a67 1341
efa1cf45 1342 return 0;
efa1cf45
DY
1343}
1344
0130df54
SH
1345int netdev_get_mtu(int ifindex)
1346{
ebc73a67 1347 int answer_len, err, res;
0130df54 1348 struct nl_handler nlh;
06f976ca 1349 struct ifinfomsg *ifi;
0130df54 1350 struct nlmsghdr *msg;
ebc73a67
CB
1351 int readmore = 0, recv_len = 0;
1352 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1353
1354 err = netlink_open(&nlh, NETLINK_ROUTE);
1355 if (err)
1356 return err;
1357
1358 err = -ENOMEM;
1359 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1360 if (!nlmsg)
1361 goto out;
1362
06f976ca 1363 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1364 if (!answer)
1365 goto out;
1366
1367 /* Save the answer buffer length, since it will be overwritten
1368 * on the first receive (and we might need to receive more than
ebc73a67
CB
1369 * once.
1370 */
06f976ca
SZ
1371 answer_len = answer->nlmsghdr->nlmsg_len;
1372
ebc73a67 1373 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1374 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1375
06f976ca 1376 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1377 if (!ifi)
1378 goto out;
06f976ca 1379 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1380
1381 /* Send the request for addresses, which returns all addresses
1382 * on all interfaces. */
1383 err = netlink_send(&nlh, nlmsg);
1384 if (err < 0)
1385 goto out;
1386
6ce39620
CB
1387#pragma GCC diagnostic push
1388#pragma GCC diagnostic ignored "-Wcast-align"
1389
0130df54
SH
1390 do {
1391 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1392 * overwritten by a previous receive.
1393 */
06f976ca 1394 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1395
1396 /* Get the (next) batch of reply messages */
1397 err = netlink_rcv(&nlh, answer);
1398 if (err < 0)
1399 goto out;
1400
1401 recv_len = err;
0130df54
SH
1402
1403 /* Satisfy the typing for the netlink macros */
06f976ca 1404 msg = answer->nlmsghdr;
0130df54
SH
1405
1406 while (NLMSG_OK(msg, recv_len)) {
1407
1408 /* Stop reading if we see an error message */
1409 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1410 struct nlmsgerr *errmsg =
1411 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1412 err = errmsg->error;
1413 goto out;
1414 }
1415
1416 /* Stop reading if we see a NLMSG_DONE message */
1417 if (msg->nlmsg_type == NLMSG_DONE) {
1418 readmore = 0;
1419 break;
1420 }
1421
06f976ca 1422 ifi = NLMSG_DATA(msg);
0130df54
SH
1423 if (ifi->ifi_index == ifindex) {
1424 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1425 int attr_len =
1426 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1427 res = 0;
ebc73a67
CB
1428 while (RTA_OK(rta, attr_len)) {
1429 /* Found a local address for the
1430 * requested interface, return it.
1431 */
0130df54 1432 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1433 memcpy(&res, RTA_DATA(rta),
1434 sizeof(int));
0130df54
SH
1435 err = res;
1436 goto out;
1437 }
1438 rta = RTA_NEXT(rta, attr_len);
1439 }
0130df54
SH
1440 }
1441
ebc73a67
CB
1442 /* Keep reading more data from the socket if the last
1443 * message had the NLF_F_MULTI flag set.
1444 */
0130df54
SH
1445 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1446
ebc73a67 1447 /* Look at the next message received in this buffer. */
0130df54
SH
1448 msg = NLMSG_NEXT(msg, recv_len);
1449 }
1450 } while (readmore);
1451
6ce39620
CB
1452#pragma GCC diagnostic pop
1453
ebc73a67 1454 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1455 err = -1;
1456
1457out:
1458 netlink_close(&nlh);
1459 nlmsg_free(answer);
1460 nlmsg_free(nlmsg);
1461 return err;
1462}
1463
d472214b 1464int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1465{
ebc73a67 1466 int err, index, len;
06f976ca 1467 struct ifinfomsg *ifi;
ebc73a67
CB
1468 struct nl_handler nlh;
1469 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1470
3cfc0f3a
MN
1471 err = netlink_open(&nlh, NETLINK_ROUTE);
1472 if (err)
1473 return err;
75d09f83 1474
3cfc0f3a 1475 err = -EINVAL;
75d09f83 1476 len = strlen(name);
dae3fdf6 1477 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1478 goto out;
1479
3cfc0f3a 1480 err = -ENOMEM;
75d09f83
DL
1481 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1482 if (!nlmsg)
1483 goto out;
1484
06f976ca 1485 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1486 if (!answer)
1487 goto out;
1488
3cfc0f3a 1489 err = -EINVAL;
75d09f83
DL
1490 index = if_nametoindex(name);
1491 if (!index)
1492 goto out;
1493
ebc73a67 1494 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1495 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1496
1497 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1498 if (!ifi) {
1499 err = -ENOMEM;
1500 goto out;
1501 }
06f976ca
SZ
1502 ifi->ifi_family = AF_UNSPEC;
1503 ifi->ifi_index = index;
75d09f83
DL
1504
1505 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1506 goto out;
1507
1508 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1509out:
1510 netlink_close(&nlh);
1511 nlmsg_free(nlmsg);
1512 nlmsg_free(answer);
1513 return err;
1514}
1515
d472214b 1516int lxc_netdev_up(const char *name)
0ad19a3f 1517{
d472214b 1518 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1519}
1520
d472214b 1521int lxc_netdev_down(const char *name)
0ad19a3f 1522{
d472214b 1523 return netdev_set_flag(name, 0);
0ad19a3f 1524}
1525
497353b6 1526int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1527{
ebc73a67 1528 int err, len;
06f976ca 1529 struct ifinfomsg *ifi;
ebc73a67 1530 struct nl_handler nlh;
0ad19a3f 1531 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1532 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1533
3cfc0f3a
MN
1534 err = netlink_open(&nlh, NETLINK_ROUTE);
1535 if (err)
1536 return err;
0ad19a3f 1537
3cfc0f3a 1538 err = -EINVAL;
0ad19a3f 1539 len = strlen(name1);
dae3fdf6 1540 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1541 goto out;
1542
1543 len = strlen(name2);
dae3fdf6 1544 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1545 goto out;
1546
3cfc0f3a 1547 err = -ENOMEM;
0ad19a3f 1548 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1549 if (!nlmsg)
1550 goto out;
1551
06f976ca 1552 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1553 if (!answer)
1554 goto out;
1555
06f976ca 1556 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1557 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1558 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1559
1560 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1561 if (!ifi)
1562 goto out;
06f976ca 1563 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1564
3cfc0f3a 1565 err = -EINVAL;
79e68309 1566 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1567 if (!nest1)
1568 goto out;
1569
1570 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1571 goto out;
1572
1573 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1574 if (!nest2)
1575 goto out;
1576
1577 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1578 if (!nest3)
1579 goto out;
1580
06f976ca 1581 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1582 if (!ifi) {
1583 err = -ENOMEM;
06f976ca 1584 goto out;
25a9939b 1585 }
0ad19a3f 1586
1587 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1588 goto out;
1589
1590 nla_end_nested(nlmsg, nest3);
0ad19a3f 1591 nla_end_nested(nlmsg, nest2);
0ad19a3f 1592 nla_end_nested(nlmsg, nest1);
1593
1594 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1595 goto out;
1596
3cfc0f3a 1597 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1598out:
1599 netlink_close(&nlh);
1600 nlmsg_free(answer);
1601 nlmsg_free(nlmsg);
1602 return err;
1603}
1604
ebc73a67 1605/* TODO: merge with lxc_macvlan_create */
7c11d57a 1606int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1607{
ebc73a67 1608 int err, len, lindex;
06f976ca 1609 struct ifinfomsg *ifi;
ebc73a67 1610 struct nl_handler nlh;
26c39028 1611 struct rtattr *nest, *nest2;
ebc73a67 1612 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1613
3cfc0f3a
MN
1614 err = netlink_open(&nlh, NETLINK_ROUTE);
1615 if (err)
1616 return err;
26c39028 1617
3cfc0f3a 1618 err = -EINVAL;
26c39028 1619 len = strlen(master);
dae3fdf6 1620 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1621 goto err3;
1622
1623 len = strlen(name);
dae3fdf6 1624 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1625 goto err3;
1626
3cfc0f3a 1627 err = -ENOMEM;
26c39028
JHS
1628 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1629 if (!nlmsg)
1630 goto err3;
1631
06f976ca 1632 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1633 if (!answer)
1634 goto err2;
1635
3cfc0f3a 1636 err = -EINVAL;
26c39028
JHS
1637 lindex = if_nametoindex(master);
1638 if (!lindex)
1639 goto err1;
1640
06f976ca 1641 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1642 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1643 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1644
1645 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1646 if (!ifi) {
1647 err = -ENOMEM;
1648 goto err1;
1649 }
06f976ca 1650 ifi->ifi_family = AF_UNSPEC;
26c39028 1651
79e68309 1652 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1653 if (!nest)
1654 goto err1;
1655
1656 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1657 goto err1;
1658
1659 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1660 if (!nest2)
1661 goto err1;
e892973e 1662
26c39028
JHS
1663 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1664 goto err1;
e892973e 1665
26c39028 1666 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1667 nla_end_nested(nlmsg, nest);
1668
1669 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1670 goto err1;
1671
1672 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1673 goto err1;
1674
3cfc0f3a 1675 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1676err1:
1677 nlmsg_free(answer);
1678err2:
1679 nlmsg_free(nlmsg);
1680err3:
1681 netlink_close(&nlh);
1682 return err;
1683}
1684
e892973e 1685int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1686{
ebc73a67 1687 int err, index, len;
06f976ca 1688 struct ifinfomsg *ifi;
ebc73a67 1689 struct nl_handler nlh;
e892973e 1690 struct rtattr *nest, *nest2;
ebc73a67 1691 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1692
3cfc0f3a
MN
1693 err = netlink_open(&nlh, NETLINK_ROUTE);
1694 if (err)
1695 return err;
0ad19a3f 1696
3cfc0f3a 1697 err = -EINVAL;
0ad19a3f 1698 len = strlen(master);
dae3fdf6 1699 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1700 goto out;
1701
1702 len = strlen(name);
dae3fdf6 1703 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1704 goto out;
1705
3cfc0f3a 1706 err = -ENOMEM;
0ad19a3f 1707 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1708 if (!nlmsg)
1709 goto out;
1710
06f976ca 1711 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1712 if (!answer)
1713 goto out;
1714
3cfc0f3a 1715 err = -EINVAL;
0ad19a3f 1716 index = if_nametoindex(master);
1717 if (!index)
1718 goto out;
1719
06f976ca 1720 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1721 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1722 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1723
1724 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1725 if (!ifi) {
1726 err = -ENOMEM;
1727 goto out;
1728 }
06f976ca 1729 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1730
79e68309 1731 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1732 if (!nest)
1733 goto out;
1734
1735 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1736 goto out;
1737
e892973e
DL
1738 if (mode) {
1739 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1740 if (!nest2)
1741 goto out;
1742
1743 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1744 goto out;
1745
1746 nla_end_nested(nlmsg, nest2);
1747 }
1748
0ad19a3f 1749 nla_end_nested(nlmsg, nest);
1750
1751 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1752 goto out;
1753
1754 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1755 goto out;
1756
3cfc0f3a 1757 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1758out:
1759 netlink_close(&nlh);
1760 nlmsg_free(answer);
1761 nlmsg_free(nlmsg);
1762 return err;
1763}
1764
1765static int proc_sys_net_write(const char *path, const char *value)
1766{
ebc73a67
CB
1767 int fd;
1768 int err = 0;
0ad19a3f 1769
1770 fd = open(path, O_WRONLY);
1771 if (fd < 0)
1772 return -errno;
1773
f640cf46 1774 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1775 err = -errno;
1776
1777 close(fd);
1778 return err;
1779}
1780
6509154d 1781static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1782{
1783 int ret;
1784 char path[PATH_MAX];
1785 char buf[1] = "";
1786
1787 if (family != AF_INET && family != AF_INET6)
1788 return minus_one_set_errno(EINVAL);
1789
1790 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1791 family == AF_INET ? "ipv4" : "ipv6", ifname,
1792 "forwarding");
1793 if (ret < 0 || (size_t)ret >= PATH_MAX)
1794 return minus_one_set_errno(E2BIG);
1795
1796 return lxc_read_file_expect(path, buf, 1, "1");
1797}
1798
0ad19a3f 1799static int neigh_proxy_set(const char *ifname, int family, int flag)
1800{
9ba8130c 1801 int ret;
419590da 1802 char path[PATH_MAX];
0ad19a3f 1803
1804 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1805 return -EINVAL;
0ad19a3f 1806
419590da 1807 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1808 family == AF_INET ? "ipv4" : "ipv6", ifname,
1809 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1810 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1811 return -E2BIG;
0ad19a3f 1812
ebc73a67 1813 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1814}
1815
6509154d 1816static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1817{
1818 int ret;
1819 char path[PATH_MAX];
1820 char buf[1] = "";
1821
1822 if (family != AF_INET && family != AF_INET6)
1823 return minus_one_set_errno(EINVAL);
1824
1825 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1826 family == AF_INET ? "ipv4" : "ipv6", ifname,
1827 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1828 if (ret < 0 || (size_t)ret >= PATH_MAX)
1829 return minus_one_set_errno(E2BIG);
1830
1831 return lxc_read_file_expect(path, buf, 1, "1");
1832}
1833
497353b6 1834int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1835{
1836 return neigh_proxy_set(name, family, 1);
1837}
1838
497353b6 1839int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1840{
1841 return neigh_proxy_set(name, family, 0);
1842}
1843
1844int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1845{
1f1b18e7
DL
1846 int i = 0;
1847 unsigned val;
ebc73a67
CB
1848 char c;
1849 unsigned char *data;
1f1b18e7
DL
1850
1851 sockaddr->sa_family = ARPHRD_ETHER;
1852 data = (unsigned char *)sockaddr->sa_data;
1853
1854 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1855 c = *macaddr++;
1856 if (isdigit(c))
1857 val = c - '0';
1858 else if (c >= 'a' && c <= 'f')
1859 val = c - 'a' + 10;
1860 else if (c >= 'A' && c <= 'F')
1861 val = c - 'A' + 10;
1862 else
1863 return -EINVAL;
1864
1865 val <<= 4;
1866 c = *macaddr;
1867 if (isdigit(c))
1868 val |= c - '0';
1869 else if (c >= 'a' && c <= 'f')
1870 val |= c - 'a' + 10;
1871 else if (c >= 'A' && c <= 'F')
1872 val |= c - 'A' + 10;
1873 else if (c == ':' || c == 0)
1874 val >>= 4;
1875 else
1876 return -EINVAL;
1877 if (c != 0)
1878 macaddr++;
1879 *data++ = (unsigned char)(val & 0377);
1880 i++;
1881
1882 if (*macaddr == ':')
1883 macaddr++;
0ad19a3f 1884 }
0ad19a3f 1885
1f1b18e7 1886 return 0;
0ad19a3f 1887}
1888
ebc73a67
CB
1889static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1890 void *acast, int prefix)
0ad19a3f 1891{
ebc73a67 1892 int addrlen, err;
06f976ca 1893 struct ifaddrmsg *ifa;
ebc73a67
CB
1894 struct nl_handler nlh;
1895 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1896
ebc73a67
CB
1897 addrlen = family == AF_INET ? sizeof(struct in_addr)
1898 : sizeof(struct in6_addr);
4bf1968d 1899
3cfc0f3a
MN
1900 err = netlink_open(&nlh, NETLINK_ROUTE);
1901 if (err)
1902 return err;
0ad19a3f 1903
3cfc0f3a 1904 err = -ENOMEM;
0ad19a3f 1905 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1906 if (!nlmsg)
1907 goto out;
1908
06f976ca 1909 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1910 if (!answer)
1911 goto out;
1912
06f976ca 1913 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1914 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1915 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1916
1917 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1918 if (!ifa)
25a9939b 1919 goto out;
06f976ca
SZ
1920 ifa->ifa_prefixlen = prefix;
1921 ifa->ifa_index = ifindex;
1922 ifa->ifa_family = family;
1923 ifa->ifa_scope = 0;
acf47e1b 1924
3cfc0f3a 1925 err = -EINVAL;
4bf1968d 1926 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1927 goto out;
1928
4bf1968d 1929 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1930 goto out;
1931
d8948a52 1932 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1933 goto out;
1934
ebc73a67 1935 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1936 err = -EPROTONOSUPPORT;
79881dc6
DL
1937 if (family == AF_INET6 &&
1938 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1939 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1940 goto out;
0ad19a3f 1941
3cfc0f3a 1942 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1943out:
1944 netlink_close(&nlh);
1945 nlmsg_free(answer);
1946 nlmsg_free(nlmsg);
1947 return err;
1948}
1949
1f1b18e7 1950int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1951 struct in6_addr *mcast, struct in6_addr *acast,
1952 int prefix)
1f1b18e7
DL
1953{
1954 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1955}
1956
ebc73a67
CB
1957int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1958 int prefix)
1f1b18e7
DL
1959{
1960 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1961}
1962
ebc73a67
CB
1963/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1964 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1965 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1966 */
6ce39620
CB
1967#pragma GCC diagnostic push
1968#pragma GCC diagnostic ignored "-Wcast-align"
1969
ebc73a67
CB
1970static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1971{
1972 int addrlen;
06f976ca
SZ
1973 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1974 struct rtattr *rta = IFA_RTA(ifa);
1975 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1976
06f976ca 1977 if (ifa->ifa_family != family)
19a26f82
MK
1978 return 0;
1979
ebc73a67
CB
1980 addrlen = family == AF_INET ? sizeof(struct in_addr)
1981 : sizeof(struct in6_addr);
19a26f82
MK
1982
1983 /* Loop over the rtattr's in this message */
ebc73a67 1984 while (RTA_OK(rta, attr_len)) {
19a26f82 1985 /* Found a local address for the requested interface,
ebc73a67
CB
1986 * return it.
1987 */
1988 if (rta->rta_type == IFA_LOCAL ||
1989 rta->rta_type == IFA_ADDRESS) {
1990 /* Sanity check. The family check above should make sure
1991 * the address length is correct, but check here just in
1992 * case.
1993 */
19a26f82
MK
1994 if (RTA_PAYLOAD(rta) != addrlen)
1995 return -1;
1996
ebc73a67
CB
1997 /* We might have found an IFA_ADDRESS before, which we
1998 * now overwrite with an IFA_LOCAL.
1999 */
dd66e5ad 2000 if (!*res) {
19a26f82 2001 *res = malloc(addrlen);
dd66e5ad
DE
2002 if (!*res)
2003 return -1;
2004 }
19a26f82
MK
2005
2006 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2007 if (rta->rta_type == IFA_LOCAL)
2008 break;
2009 }
2010 rta = RTA_NEXT(rta, attr_len);
2011 }
2012 return 0;
2013}
2014
6ce39620
CB
2015#pragma GCC diagnostic pop
2016
19a26f82
MK
2017static int ip_addr_get(int family, int ifindex, void **res)
2018{
ebc73a67 2019 int answer_len, err;
06f976ca 2020 struct ifaddrmsg *ifa;
ebc73a67 2021 struct nl_handler nlh;
19a26f82 2022 struct nlmsghdr *msg;
ebc73a67
CB
2023 int readmore = 0, recv_len = 0;
2024 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2025
2026 err = netlink_open(&nlh, NETLINK_ROUTE);
2027 if (err)
2028 return err;
2029
2030 err = -ENOMEM;
2031 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2032 if (!nlmsg)
2033 goto out;
2034
06f976ca 2035 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2036 if (!answer)
2037 goto out;
2038
ebc73a67
CB
2039 /* Save the answer buffer length, since it will be overwritten on the
2040 * first receive (and we might need to receive more than once).
2041 */
06f976ca
SZ
2042 answer_len = answer->nlmsghdr->nlmsg_len;
2043
ebc73a67 2044 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2045 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2046
06f976ca 2047 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2048 if (!ifa)
2049 goto out;
06f976ca 2050 ifa->ifa_family = family;
19a26f82 2051
ebc73a67
CB
2052 /* Send the request for addresses, which returns all addresses on all
2053 * interfaces.
2054 */
19a26f82
MK
2055 err = netlink_send(&nlh, nlmsg);
2056 if (err < 0)
2057 goto out;
19a26f82 2058
6ce39620
CB
2059#pragma GCC diagnostic push
2060#pragma GCC diagnostic ignored "-Wcast-align"
2061
19a26f82
MK
2062 do {
2063 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2064 * overwritten by a previous receive.
2065 */
06f976ca 2066 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2067
ebc73a67 2068 /* Get the (next) batch of reply messages. */
19a26f82
MK
2069 err = netlink_rcv(&nlh, answer);
2070 if (err < 0)
2071 goto out;
2072
2073 recv_len = err;
2074 err = 0;
2075
ebc73a67 2076 /* Satisfy the typing for the netlink macros. */
06f976ca 2077 msg = answer->nlmsghdr;
19a26f82
MK
2078
2079 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2080 /* Stop reading if we see an error message. */
19a26f82 2081 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2082 struct nlmsgerr *errmsg =
2083 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2084 err = errmsg->error;
2085 goto out;
2086 }
2087
ebc73a67 2088 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2089 if (msg->nlmsg_type == NLMSG_DONE) {
2090 readmore = 0;
2091 break;
2092 }
2093
2094 if (msg->nlmsg_type != RTM_NEWADDR) {
2095 err = -1;
2096 goto out;
2097 }
2098
06f976ca
SZ
2099 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2100 if (ifa->ifa_index == ifindex) {
2101 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2102 err = -1;
2103 goto out;
2104 }
2105
ebc73a67 2106 /* Found a result, stop searching. */
19a26f82
MK
2107 if (*res)
2108 goto out;
2109 }
2110
ebc73a67
CB
2111 /* Keep reading more data from the socket if the last
2112 * message had the NLF_F_MULTI flag set.
2113 */
19a26f82
MK
2114 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2115
ebc73a67 2116 /* Look at the next message received in this buffer. */
19a26f82
MK
2117 msg = NLMSG_NEXT(msg, recv_len);
2118 }
2119 } while (readmore);
2120
6ce39620
CB
2121#pragma GCC diagnostic pop
2122
19a26f82 2123 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2124 * error.
2125 */
19a26f82
MK
2126 err = -1;
2127
2128out:
2129 netlink_close(&nlh);
2130 nlmsg_free(answer);
2131 nlmsg_free(nlmsg);
2132 return err;
2133}
2134
2135int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2136{
ebc73a67 2137 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2138}
2139
ebc73a67 2140int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2141{
ebc73a67 2142 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2143}
2144
f8fee0e2
MK
2145static int ip_gateway_add(int family, int ifindex, void *gw)
2146{
ebc73a67 2147 int addrlen, err;
f8fee0e2 2148 struct nl_handler nlh;
06f976ca 2149 struct rtmsg *rt;
ebc73a67 2150 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2151
ebc73a67
CB
2152 addrlen = family == AF_INET ? sizeof(struct in_addr)
2153 : sizeof(struct in6_addr);
f8fee0e2
MK
2154
2155 err = netlink_open(&nlh, NETLINK_ROUTE);
2156 if (err)
2157 return err;
2158
2159 err = -ENOMEM;
2160 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2161 if (!nlmsg)
2162 goto out;
2163
06f976ca 2164 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2165 if (!answer)
2166 goto out;
2167
06f976ca 2168 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2169 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2170 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2171
2172 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2173 if (!rt)
2174 goto out;
06f976ca
SZ
2175 rt->rtm_family = family;
2176 rt->rtm_table = RT_TABLE_MAIN;
2177 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2178 rt->rtm_protocol = RTPROT_BOOT;
2179 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2180 /* "default" destination */
06f976ca 2181 rt->rtm_dst_len = 0;
f8fee0e2
MK
2182
2183 err = -EINVAL;
a2f9a670 2184
2185 /* If gateway address not supplied, then a device route will be created instead */
2186 if (gw != NULL) {
2187 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2188 goto out;
2189 }
f8fee0e2
MK
2190
2191 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2192 * addresses for the gateway.
2193 */
f8fee0e2
MK
2194 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2195 goto out;
2196
2197 err = netlink_transaction(&nlh, nlmsg, answer);
2198out:
2199 netlink_close(&nlh);
2200 nlmsg_free(answer);
2201 nlmsg_free(nlmsg);
2202 return err;
2203}
2204
2205int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2206{
2207 return ip_gateway_add(AF_INET, ifindex, gw);
2208}
2209
2210int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2211{
2212 return ip_gateway_add(AF_INET6, ifindex, gw);
2213}
581c75e7 2214bool is_ovs_bridge(const char *bridge)
0d204771 2215{
ebc73a67 2216 int ret;
0d204771 2217 struct stat sb;
ebc73a67 2218 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2219
ebc73a67
CB
2220 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2221 bridge);
2222 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2223 return false;
2224
2225 ret = stat(brdirname, &sb);
2226 if (ret < 0 && errno == ENOENT)
0d204771 2227 return true;
ebc73a67 2228
0d204771
SH
2229 return false;
2230}
2231
581c75e7
CB
2232struct ovs_veth_args {
2233 const char *bridge;
2234 const char *nic;
2235};
2236
cb0dc11b
CB
2237/* Called from a background thread - when nic goes away, remove it from the
2238 * bridge.
c43cbc04 2239 */
581c75e7 2240static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2241{
581c75e7 2242 struct ovs_veth_args *args = data;
cb0dc11b 2243
581c75e7
CB
2244 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2245 (char *)NULL);
2246 return -1;
c43cbc04
SH
2247}
2248
581c75e7 2249int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2250{
c43cbc04 2251 int ret;
419590da 2252 char cmd_output[PATH_MAX];
581c75e7 2253 struct ovs_veth_args args;
6ad22d06 2254
581c75e7
CB
2255 args.bridge = bridge;
2256 args.nic = nic;
2257 ret = run_command(cmd_output, sizeof(cmd_output),
2258 lxc_ovs_delete_port_exec, (void *)&args);
2259 if (ret < 0) {
2260 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2261 "%s", bridge, nic, cmd_output);
6ad22d06 2262 return -1;
581c75e7 2263 }
0d204771 2264
581c75e7
CB
2265 return 0;
2266}
ebc73a67 2267
581c75e7
CB
2268static int lxc_ovs_attach_bridge_exec(void *data)
2269{
2270 struct ovs_veth_args *args = data;
ebc73a67 2271
581c75e7
CB
2272 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2273 (char *)NULL);
2274 return -1;
2275}
ebc73a67 2276
581c75e7
CB
2277static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2278{
2279 int ret;
419590da 2280 char cmd_output[PATH_MAX];
581c75e7 2281 struct ovs_veth_args args;
ebc73a67 2282
581c75e7
CB
2283 args.bridge = bridge;
2284 args.nic = nic;
2285 ret = run_command(cmd_output, sizeof(cmd_output),
2286 lxc_ovs_attach_bridge_exec, (void *)&args);
2287 if (ret < 0) {
2288 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2289 bridge, nic, cmd_output);
2290 return -1;
c43cbc04 2291 }
0d204771 2292
581c75e7 2293 return 0;
0d204771 2294}
0d204771 2295
581c75e7 2296int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2297{
ebc73a67 2298 int err, fd, index;
9de31d5a 2299 size_t retlen;
0ad19a3f 2300 struct ifreq ifr;
2301
dae3fdf6 2302 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2303 return -EINVAL;
0ad19a3f 2304
2305 index = if_nametoindex(ifname);
2306 if (!index)
3cfc0f3a 2307 return -EINVAL;
0ad19a3f 2308
0d204771 2309 if (is_ovs_bridge(bridge))
581c75e7 2310 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2311
ad9429e5 2312 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2313 if (fd < 0)
3cfc0f3a 2314 return -errno;
0ad19a3f 2315
9de31d5a 2316 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2317 if (retlen >= IFNAMSIZ) {
2318 close(fd);
9de31d5a 2319 return -E2BIG;
42cc4083 2320 }
9de31d5a 2321
ebc73a67 2322 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2323 ifr.ifr_ifindex = index;
7d163508 2324 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2325 close(fd);
3cfc0f3a
MN
2326 if (err)
2327 err = -errno;
0ad19a3f 2328
2329 return err;
2330}
72d0e1cb 2331
ebc73a67 2332static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2333 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2334 [LXC_NET_VETH] = "veth",
2335 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2336 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2337 [LXC_NET_PHYS] = "phys",
b343592b
BP
2338 [LXC_NET_VLAN] = "vlan",
2339 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2340};
2341
2342const char *lxc_net_type_to_str(int type)
2343{
2344 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2345 return NULL;
ebc73a67 2346
72d0e1cb
SG
2347 return lxc_network_types[type];
2348}
8befa924 2349
ebc73a67 2350static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2351
966e9f1f 2352char *lxc_mkifname(char *template)
a0265685 2353{
2d7bf744 2354 int ret;
b1e44ed1 2355 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2356 char name[IFNAMSIZ];
2357 bool exists = false;
2358 size_t i = 0;
280cc35f 2359#ifdef HAVE_RAND_R
2360 unsigned int seed;
2361
2362 seed = randseed(false);
2363#else
2364
2365 (void)randseed(true);
2366#endif
a0265685 2367
535e8859
CB
2368 if (strlen(template) >= IFNAMSIZ)
2369 return NULL;
2370
ebc73a67 2371 /* Get all the network interfaces. */
b1e44ed1 2372 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2373 if (ret < 0) {
6d1400b5 2374 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2375 return NULL;
2376 }
a0265685 2377
ebc73a67 2378 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2379 for (;;) {
966e9f1f 2380 name[0] = '\0';
94b1cade 2381 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2382
966e9f1f 2383 exists = false;
280cc35f 2384
a0265685
SG
2385 for (i = 0; i < strlen(name); i++) {
2386 if (name[i] == 'X') {
2387#ifdef HAVE_RAND_R
8523344a 2388 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2389#else
8523344a 2390 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2391#endif
2392 }
2393 }
2394
2395 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2396 if (!strcmp(ifa->ifa_name, name)) {
2397 exists = true;
a0265685
SG
2398 break;
2399 }
2400 }
2401
966e9f1f 2402 if (!exists)
a0265685 2403 break;
a0265685
SG
2404 }
2405
b1e44ed1 2406 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2407 (void)strlcpy(template, name, strlen(template) + 1);
2408
2409 return template;
a0265685
SG
2410}
2411
8befa924
SH
2412int setup_private_host_hw_addr(char *veth1)
2413{
ebc73a67 2414 int err, sockfd;
8befa924 2415 struct ifreq ifr;
8befa924 2416
ad9429e5 2417 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2418 if (sockfd < 0)
2419 return -errno;
2420
ebc73a67 2421 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2422 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2423 close(sockfd);
ebc73a67 2424 return -E2BIG;
87c6e5db 2425 }
ebc73a67 2426
8befa924
SH
2427 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2428 if (err < 0) {
8befa924 2429 close(sockfd);
8befa924
SH
2430 return -errno;
2431 }
2432
2433 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2434 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2435 close(sockfd);
8befa924
SH
2436 if (err < 0)
2437 return -errno;
2438
2439 return 0;
2440}
811ef482
CB
2441
2442int lxc_find_gateway_addresses(struct lxc_handler *handler)
2443{
2444 struct lxc_list *network = &handler->conf->network;
2445 struct lxc_list *iterator;
2446 struct lxc_netdev *netdev;
2447 int link_index;
2448
2449 lxc_list_for_each(iterator, network) {
2450 netdev = iterator->elem;
2451
2452 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2453 continue;
2454
2455 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2456 ERROR("Automatic gateway detection is only supported "
2457 "for veth and macvlan");
2458 return -1;
2459 }
2460
de4855a8 2461 if (netdev->link[0] == '\0') {
811ef482
CB
2462 ERROR("Automatic gateway detection needs a link interface");
2463 return -1;
2464 }
2465
2466 link_index = if_nametoindex(netdev->link);
2467 if (!link_index)
2468 return -EINVAL;
2469
2470 if (netdev->ipv4_gateway_auto) {
2471 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2472 ERROR("Failed to automatically find ipv4 gateway "
2473 "address from link interface \"%s\"", netdev->link);
2474 return -1;
2475 }
2476 }
2477
2478 if (netdev->ipv6_gateway_auto) {
2479 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2480 ERROR("Failed to automatically find ipv6 gateway "
2481 "address from link interface \"%s\"", netdev->link);
2482 return -1;
2483 }
2484 }
2485 }
2486
2487 return 0;
2488}
2489
2490#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2491static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2492 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2493{
2494 int ret;
2495 pid_t child;
2496 int bytes, pipefd[2];
2497 char *token, *saveptr = NULL;
095ead80 2498 char netdev_link[IFNAMSIZ];
419590da 2499 char buffer[PATH_MAX] = {0};
94b1cade 2500 size_t retlen;
811ef482
CB
2501
2502 if (netdev->type != LXC_NET_VETH) {
2503 ERROR("Network type %d not support for unprivileged use", netdev->type);
2504 return -1;
2505 }
2506
2507 ret = pipe(pipefd);
2508 if (ret < 0) {
2509 SYSERROR("Failed to create pipe");
2510 return -1;
2511 }
2512
2513 child = fork();
2514 if (child < 0) {
2515 SYSERROR("Failed to create new process");
2516 close(pipefd[0]);
2517 close(pipefd[1]);
2518 return -1;
2519 }
2520
2521 if (child == 0) {
8335fd40 2522 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2523
2524 close(pipefd[0]);
2525
2526 ret = dup2(pipefd[1], STDOUT_FILENO);
2527 if (ret >= 0)
2528 ret = dup2(pipefd[1], STDERR_FILENO);
2529 close(pipefd[1]);
2530 if (ret < 0) {
2531 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2532 _exit(EXIT_FAILURE);
811ef482
CB
2533 }
2534
de4855a8 2535 if (netdev->link[0] != '\0')
9de31d5a 2536 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2537 else
9de31d5a
CB
2538 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2539 if (retlen >= IFNAMSIZ) {
2540 SYSERROR("Invalid network device name");
2541 _exit(EXIT_FAILURE);
2542 }
811ef482 2543
8335fd40
CB
2544 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2545 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2546 _exit(EXIT_FAILURE);
8335fd40 2547 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2548
2549 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2550 lxcname, pidstr, netdev_link,
de4855a8
CB
2551 netdev->name[0] != '\0' ? netdev->name : "(null)");
2552 if (netdev->name[0] != '\0')
811ef482
CB
2553 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2554 lxcpath, lxcname, pidstr, "veth", netdev_link,
2555 netdev->name, (char *)NULL);
2556 else
2557 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2558 lxcpath, lxcname, pidstr, "veth", netdev_link,
2559 (char *)NULL);
2560 SYSERROR("Failed to execute lxc-user-nic");
78070056 2561 _exit(EXIT_FAILURE);
811ef482
CB
2562 }
2563
2564 /* close the write-end of the pipe */
2565 close(pipefd[1]);
2566
419590da 2567 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2568 if (bytes < 0) {
74c6e2b0 2569 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2570 close(pipefd[0]);
6b9f82a9
CB
2571 } else {
2572 buffer[bytes - 1] = '\0';
811ef482 2573 }
811ef482
CB
2574
2575 ret = wait_for_pid(child);
2576 close(pipefd[0]);
6b9f82a9 2577 if (ret != 0 || bytes < 0) {
811ef482
CB
2578 ERROR("lxc-user-nic failed to configure requested network: %s",
2579 buffer[0] != '\0' ? buffer : "(null)");
2580 return -1;
2581 }
2582 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2583
2584 /* netdev->name */
2585 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2586 if (!token) {
2587 ERROR("Failed to parse lxc-user-nic output");
811ef482 2588 return -1;
74c6e2b0 2589 }
811ef482 2590
e389f2af
CB
2591 /*
2592 * lxc-user-nic will take care of proper network device naming. So
2593 * netdev->name and netdev->created_name need to be identical to not
2594 * trigger another rename later on.
2595 */
2596 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2597 if (retlen < IFNAMSIZ)
2598 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2599 if (retlen >= IFNAMSIZ) {
2600 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2601 return -E2BIG;
2602 }
811ef482 2603
74c6e2b0 2604 /* netdev->ifindex */
811ef482 2605 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2606 if (!token) {
2607 ERROR("Failed to parse lxc-user-nic output");
811ef482 2608 return -1;
74c6e2b0 2609 }
811ef482 2610
74c6e2b0
CB
2611 ret = lxc_safe_int(token, &netdev->ifindex);
2612 if (ret < 0) {
6d1400b5 2613 errno = -ret;
2614 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2615 return -1;
2616 }
2617
74c6e2b0 2618 /* netdev->priv.veth_attr.veth1 */
811ef482 2619 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2620 if (!token) {
2621 ERROR("Failed to parse lxc-user-nic output");
811ef482 2622 return -1;
74c6e2b0 2623 }
811ef482 2624
94b1cade
DJ
2625 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2626 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2627 ERROR("Host side veth device name returned by lxc-user-nic is "
2628 "too long");
2629 return -E2BIG;
2630 }
74c6e2b0
CB
2631
2632 /* netdev->priv.veth_attr.ifindex */
2633 token = strtok_r(NULL, ":", &saveptr);
2634 if (!token) {
2635 ERROR("Failed to parse lxc-user-nic output");
2636 return -1;
2637 }
2638
2639 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2640 if (ret < 0) {
6d1400b5 2641 errno = -ret;
2642 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2643 return -1;
2644 }
2645
4d781681 2646 if (netdev->upscript) {
2647 char *argv[] = {
2648 "veth",
2649 netdev->link,
2650 netdev->priv.veth_attr.veth1,
2651 NULL,
2652 };
2653
e389f2af
CB
2654 ret = run_script_argv(lxcname, hooks_version, "net",
2655 netdev->upscript, "up", argv);
4d781681 2656 if (ret < 0)
2657 return -1;
2658 }
2659
811ef482
CB
2660 return 0;
2661}
2662
f0ecc19d 2663static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2664 struct lxc_netdev *netdev,
2665 const char *netns_path)
811ef482
CB
2666{
2667 int bytes, ret;
2668 pid_t child;
2669 int pipefd[2];
419590da 2670 char buffer[PATH_MAX] = {0};
811ef482
CB
2671
2672 if (netdev->type != LXC_NET_VETH) {
2673 ERROR("Network type %d not support for unprivileged use", netdev->type);
2674 return -1;
2675 }
2676
2677 ret = pipe(pipefd);
2678 if (ret < 0) {
2679 SYSERROR("Failed to create pipe");
2680 return -1;
2681 }
2682
2683 child = fork();
2684 if (child < 0) {
2685 SYSERROR("Failed to create new process");
2686 close(pipefd[0]);
2687 close(pipefd[1]);
2688 return -1;
2689 }
2690
2691 if (child == 0) {
8843fde4 2692 char *hostveth;
811ef482
CB
2693
2694 close(pipefd[0]);
2695
2696 ret = dup2(pipefd[1], STDOUT_FILENO);
2697 if (ret >= 0)
2698 ret = dup2(pipefd[1], STDERR_FILENO);
2699 close(pipefd[1]);
2700 if (ret < 0) {
2701 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2702 _exit(EXIT_FAILURE);
811ef482
CB
2703 }
2704
8843fde4
CB
2705 if (netdev->priv.veth_attr.pair[0] != '\0')
2706 hostveth = netdev->priv.veth_attr.pair;
2707 else
2708 hostveth = netdev->priv.veth_attr.veth1;
2709 if (hostveth[0] == '\0') {
74c6e2b0 2710 SYSERROR("Host side veth device name is missing");
a30b9023 2711 _exit(EXIT_FAILURE);
74c6e2b0
CB
2712 }
2713
de4855a8 2714 if (netdev->link[0] == '\0') {
811ef482 2715 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2716 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2717 _exit(EXIT_FAILURE);
74c6e2b0 2718 }
811ef482 2719
811ef482 2720 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2721 lxcname, netns_path, netdev->link, hostveth);
811ef482 2722 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2723 lxcname, netns_path, "veth", netdev->link, hostveth,
2724 (char *)NULL);
811ef482 2725 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2726 _exit(EXIT_FAILURE);
811ef482
CB
2727 }
2728
2729 close(pipefd[1]);
2730
419590da 2731 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2732 if (bytes < 0) {
2733 SYSERROR("Failed to read from pipe file descriptor.");
2734 close(pipefd[0]);
6b9f82a9
CB
2735 } else {
2736 buffer[bytes - 1] = '\0';
811ef482 2737 }
811ef482 2738
6b9f82a9
CB
2739 ret = wait_for_pid(child);
2740 close(pipefd[0]);
2741 if (ret != 0 || bytes < 0) {
811ef482
CB
2742 ERROR("lxc-user-nic failed to delete requested network: %s",
2743 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2744 return -1;
2745 }
2746
811ef482
CB
2747 return 0;
2748}
2749
1bd8d726
CB
2750bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2751{
2752 int ret;
2753 struct lxc_list *iterator;
2754 struct lxc_list *network = &handler->conf->network;
2755 /* strlen("/proc/") = 6
2756 * +
8335fd40 2757 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2758 * +
2759 * strlen("/fd/") = 4
2760 * +
8335fd40 2761 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2762 * +
2763 * \0
2764 */
8335fd40 2765 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2766
2767 *netns_path = '\0';
2768
28d9e29e 2769 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2770 DEBUG("Cannot not guarantee safe deletion of network devices. "
2771 "Manual cleanup maybe needed");
2772 return false;
2773 }
2774
2775 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2776 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2777 if (ret < 0 || ret >= sizeof(netns_path))
2778 return false;
2779
2780 lxc_list_for_each(iterator, network) {
2781 char *hostveth = NULL;
2782 struct lxc_netdev *netdev = iterator->elem;
2783
2784 /* We can only delete devices whose ifindex we have. If we don't
2785 * have the index it means that we didn't create it.
2786 */
2787 if (!netdev->ifindex)
2788 continue;
2789
2790 if (netdev->type == LXC_NET_PHYS) {
2791 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2792 netdev->link);
2793 if (ret < 0)
2794 WARN("Failed to rename interface with index %d "
2795 "to its initial name \"%s\"",
2796 netdev->ifindex, netdev->link);
2797 else
2798 TRACE("Renamed interface with index %d to its "
2799 "initial name \"%s\"",
2800 netdev->ifindex, netdev->link);
b3259dc6
TP
2801
2802 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2803 goto clear_ifindices;
1bd8d726
CB
2804 }
2805
2806 ret = netdev_deconf[netdev->type](handler, netdev);
2807 if (ret < 0)
2808 WARN("Failed to deconfigure network device");
2809
2810 if (netdev->type != LXC_NET_VETH)
66a7c406 2811 goto clear_ifindices;
1bd8d726 2812
c869be20 2813 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2814 goto clear_ifindices;
1bd8d726 2815
8843fde4
CB
2816 if (netdev->priv.veth_attr.pair[0] != '\0')
2817 hostveth = netdev->priv.veth_attr.pair;
2818 else
2819 hostveth = netdev->priv.veth_attr.veth1;
2820 if (hostveth[0] == '\0')
66a7c406 2821 goto clear_ifindices;
8843fde4 2822
1bd8d726
CB
2823 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2824 handler->name, netdev,
2825 netns_path);
2826 if (ret < 0) {
1bd8d726 2827 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2828 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2829 goto clear_ifindices;
1bd8d726
CB
2830 }
2831 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2832 netdev->link);
66a7c406
CB
2833
2834clear_ifindices:
ad2ddfcd 2835 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2836 * have cached stale data which would cause it to fail on reboot
2837 * we're we don't re-read the on-disk config file.
2838 */
2839 netdev->ifindex = 0;
2840 if (netdev->type == LXC_NET_PHYS) {
2841 netdev->priv.phys_attr.ifindex = 0;
2842 } else if (netdev->type == LXC_NET_VETH) {
2843 netdev->priv.veth_attr.veth1[0] = '\0';
2844 netdev->priv.veth_attr.ifindex = 0;
2845 }
1bd8d726
CB
2846 }
2847
bb84beda 2848 return true;
1bd8d726
CB
2849}
2850
6509154d 2851struct ip_proxy_args {
2852 const char *ip;
2853 const char *dev;
2854};
2855
2856static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2857{
2858 struct ip_proxy_args *args = data;
2859
2860 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2861 return -1;
2862}
2863
2864static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2865{
2866 struct ip_proxy_args *args = data;
2867
2868 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2869 return -1;
2870}
2871
2872static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2873{
2874 int ret;
2875 char cmd_output[PATH_MAX];
2876 struct ip_proxy_args args = {
2877 .ip = ip,
2878 .dev = dev,
2879 };
2880
2881 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2882 if (ret < 0) {
2883 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2884 return -1;
2885 }
2886
2887 return 0;
2888}
2889
2890static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2891{
2892 int ret;
2893 char cmd_output[PATH_MAX];
2894 struct ip_proxy_args args = {
2895 .ip = ip,
2896 .dev = dev,
2897 };
2898
2899 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2900 if (ret < 0) {
2901 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2902 return -1;
2903 }
2904
2905 return 0;
2906}
2907
2908static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2909 struct lxc_list *cur, *next;
2910 struct lxc_inetdev *inet4dev;
2911 struct lxc_inet6dev *inet6dev;
2912 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2913 int err = 0;
2914 unsigned int lo_ifindex = 0;
6509154d 2915
2916 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2917 if (!lxc_list_empty(&netdev->ipv4)) {
2918 /* Check for net.ipv4.conf.[link].forwarding=1 */
2919 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2920 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2921 return minus_one_set_errno(EINVAL);
2922 }
2923 }
2924
2925 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2926 if (!lxc_list_empty(&netdev->ipv6)) {
2927 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2928 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2929 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2930 return minus_one_set_errno(EINVAL);
2931 }
2932
2933 /* Check for net.ipv6.conf.[link].forwarding=1 */
2934 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2935 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2936 return minus_one_set_errno(EINVAL);
2937 }
2938 }
2939
b670016a 2940 /* Perform IPVLAN specific checks. */
2941 if (netdev->type == LXC_NET_IPVLAN) {
2942 /* Check mode is l3s as other modes do not work with l2proxy. */
2943 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2944 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2945 return minus_one_set_errno(EINVAL);
2946 }
2947
2948 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2949 lo_ifindex = if_nametoindex(loDev);
2950 if (lo_ifindex == 0) {
2951 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev);
2952 return minus_one_set_errno(EINVAL);
2953 }
2954 }
2955
6509154d 2956 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2957 inet4dev = cur->elem;
2958 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2959 return minus_one_set_errno(-errno);
2960
2961 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2962 return minus_one_set_errno(EINVAL);
b670016a 2963
2964 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2965 if (netdev->type == LXC_NET_IPVLAN) {
2966 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2967 if (err < 0) {
2968 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loDev);
2969 return minus_one_set_errno(-err);
2970 }
2971 }
6509154d 2972 }
2973
2974 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2975 inet6dev = cur->elem;
2976 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2977 return minus_one_set_errno(-errno);
2978
2979 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2980 return minus_one_set_errno(EINVAL);
b670016a 2981
2982 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2983 if (netdev->type == LXC_NET_IPVLAN) {
2984 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2985 if (err < 0) {
2986 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loDev);
2987 return minus_one_set_errno(-err);
2988 }
2989 }
6509154d 2990 }
2991
2992 return 0;
2993}
2994
b670016a 2995static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2996 char bufinet4[INET_ADDRSTRLEN];
2997 unsigned int errCount = 0;
2998
2999 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3000 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3001 return minus_one_set_errno(EINVAL);
3002 }
3003
3004 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3005 if (lo_ifindex > 0) {
3006 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3007 errCount++;
3008 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3009 }
3010 }
3011
3012 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3013 if (link[0] != '\0') {
3014 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3015 errCount++;
3016 }
3017
3018 if (errCount > 0)
3019 return minus_one_set_errno(EINVAL);
3020
3021 return 0;
3022}
3023
3024static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3025 char bufinet6[INET6_ADDRSTRLEN];
3026 unsigned int errCount = 0;
3027
3028 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3029 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3030 return minus_one_set_errno(EINVAL);
3031 }
3032
3033 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3034 if (lo_ifindex > 0) {
3035 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3036 errCount++;
3037 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3038 }
3039 }
3040
3041 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3042 if (link[0] != '\0') {
3043 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3044 errCount++;
3045 }
3046
3047 if (errCount > 0)
3048 return minus_one_set_errno(EINVAL);
3049
3050 return 0;
3051}
3052
6509154d 3053static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3054 unsigned int lo_ifindex = 0;
3055 unsigned int errCount = 0;
6509154d 3056 struct lxc_list *cur, *next;
3057 struct lxc_inetdev *inet4dev;
3058 struct lxc_inet6dev *inet6dev;
6509154d 3059
b670016a 3060 /* Perform IPVLAN specific checks. */
3061 if (netdev->type == LXC_NET_IPVLAN) {
3062 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3063 lo_ifindex = if_nametoindex(loDev);
3064 if (lo_ifindex == 0) {
3065 errCount++;
3066 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev);
6509154d 3067 }
b670016a 3068 }
6509154d 3069
b670016a 3070 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3071 inet4dev = cur->elem;
3072 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3073 errCount++;
6509154d 3074 }
3075
3076 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3077 inet6dev = cur->elem;
b670016a 3078 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3079 errCount++;
6509154d 3080 }
3081
b670016a 3082 if (errCount > 0)
6509154d 3083 return minus_one_set_errno(EINVAL);
3084
3085 return 0;
3086}
3087
e389f2af 3088static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3089{
811ef482
CB
3090 struct lxc_list *iterator;
3091 struct lxc_list *network = &handler->conf->network;
3092
811ef482
CB
3093 lxc_list_for_each(iterator, network) {
3094 struct lxc_netdev *netdev = iterator->elem;
3095
3096 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3097 ERROR("Invalid network configuration type %d", netdev->type);
3098 return -1;
3099 }
3100
6509154d 3101 /* Setup l2proxy entries if enabled and used with a link property */
3102 if (netdev->l2proxy && netdev->link[0] != '\0') {
3103 if (lxc_setup_l2proxy(netdev)) {
3104 ERROR("Failed to setup l2proxy");
3105 return -1;
3106 }
3107 }
3108
811ef482
CB
3109 if (netdev_conf[netdev->type](handler, netdev)) {
3110 ERROR("Failed to create network device");
3111 return -1;
3112 }
811ef482
CB
3113 }
3114
3115 return 0;
3116}
3117
e389f2af 3118int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3119{
e389f2af
CB
3120 pid_t pid = handler->pid;
3121 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3122 struct lxc_list *iterator;
3123
e0010464 3124 if (am_guest_unpriv())
74c6e2b0 3125 return 0;
811ef482
CB
3126
3127 lxc_list_for_each(iterator, network) {
e389f2af
CB
3128 int ret;
3129 char ifname[IFNAMSIZ];
811ef482
CB
3130 struct lxc_netdev *netdev = iterator->elem;
3131
811ef482
CB
3132 if (!netdev->ifindex)
3133 continue;
3134
3135 /* retrieve the name of the interface */
3136 if (!if_indextoname(netdev->ifindex, ifname)) {
3137 ERROR("No interface corresponding to ifindex \"%d\"",
3138 netdev->ifindex);
3139 return -1;
3140 }
3141
535e8859
CB
3142 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3143 if (ret) {
6d1400b5 3144 errno = -ret;
e389f2af
CB
3145 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3146 ifname, pid);
811ef482
CB
3147 return -1;
3148 }
3149
e389f2af
CB
3150 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3151
3152 DEBUG("Moved network device \"%s\" to network namespace of %d",
3153 netdev->created_name, pid);
811ef482
CB
3154 }
3155
3156 return 0;
3157}
3158
e389f2af 3159static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3160{
e389f2af
CB
3161 int hooks_version = handler->conf->hooks_version;
3162 const char *lxcname = handler->name;
3163 const char *lxcpath = handler->lxcpath;
3164 struct lxc_list *network = &handler->conf->network;
3165 pid_t pid = handler->pid;
74c6e2b0
CB
3166 struct lxc_list *iterator;
3167
74c6e2b0
CB
3168 lxc_list_for_each(iterator, network) {
3169 struct lxc_netdev *netdev = iterator->elem;
3170
3171 if (netdev->type == LXC_NET_EMPTY)
3172 continue;
3173
3174 if (netdev->type == LXC_NET_NONE)
3175 continue;
3176
3177 if (netdev->type != LXC_NET_VETH) {
e389f2af 3178 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3179 lxc_net_type_to_str(netdev->type));
3180 return -1;
3181 }
3182
3183 if (netdev->mtu)
3184 INFO("mtu ignored due to insufficient privilege");
3185
e389f2af
CB
3186 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3187 pid, hooks_version))
74c6e2b0
CB
3188 return -1;
3189 }
3190
3191 return 0;
3192}
3193
1bd8d726 3194bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3195{
3196 int ret;
3197 struct lxc_list *iterator;
3198 struct lxc_list *network = &handler->conf->network;
1bd8d726 3199
811ef482
CB
3200 lxc_list_for_each(iterator, network) {
3201 char *hostveth = NULL;
3202 struct lxc_netdev *netdev = iterator->elem;
3203
3204 /* We can only delete devices whose ifindex we have. If we don't
3205 * have the index it means that we didn't create it.
3206 */
3207 if (!netdev->ifindex)
3208 continue;
3209
6509154d 3210 /* Delete l2proxy entries if enabled and used with a link property */
3211 if (netdev->l2proxy && netdev->link[0] != '\0') {
3212 if (lxc_delete_l2proxy(netdev))
3213 WARN("Failed to delete all l2proxy config");
3214 /* Don't return, let the network be cleaned up as normal. */
3215 }
3216
811ef482
CB
3217 if (netdev->type == LXC_NET_PHYS) {
3218 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3219 if (ret < 0)
3220 WARN("Failed to rename interface with index %d "
b809f232
CB
3221 "from \"%s\" to its initial name \"%s\"",
3222 netdev->ifindex, netdev->name, netdev->link);
0b154989 3223 else {
29589196
CB
3224 TRACE("Renamed interface with index %d from "
3225 "\"%s\" to its initial name \"%s\"",
3226 netdev->ifindex, netdev->name,
3227 netdev->link);
0b154989
TP
3228
3229 /* Restore original MTU */
3230 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3231 if (ret < 0) {
3232 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3233 netdev->link, netdev->priv.phys_attr.mtu);
3234 } else {
3235 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3236 netdev->link, netdev->priv.phys_attr.mtu);
3237 }
3238 }
b3259dc6
TP
3239
3240 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3241 goto clear_ifindices;
811ef482
CB
3242 }
3243
3244 ret = netdev_deconf[netdev->type](handler, netdev);
3245 if (ret < 0)
3246 WARN("Failed to deconfigure network device");
3247
3248 /* Recent kernels remove the virtual interfaces when the network
3249 * namespace is destroyed but in case we did not move the
3250 * interface to the network namespace, we have to destroy it.
3251 */
1bd8d726 3252 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3253 if (ret < 0) {
3254 if (errno != ENODEV) {
3255 WARN("Failed to remove interface \"%s\" with index %d",
3256 netdev->name[0] != '\0' ? netdev->name : "(null)",
3257 netdev->ifindex);
3258 goto clear_ifindices;
3259 }
3260 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3261 netdev->name[0] != '\0' ? netdev->name : "(null)",
3262 netdev->ifindex);
811ef482 3263 }
1bd8d726 3264 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3265 netdev->name[0] != '\0' ? netdev->name : "(null)",
3266 netdev->ifindex);
811ef482
CB
3267
3268 if (netdev->type != LXC_NET_VETH)
66a7c406 3269 goto clear_ifindices;
811ef482 3270
811ef482
CB
3271 /* Explicitly delete host veth device to prevent lingering
3272 * devices. We had issues in LXD around this.
3273 */
de4855a8 3274 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3275 hostveth = netdev->priv.veth_attr.pair;
3276 else
3277 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3278 if (hostveth[0] == '\0')
66a7c406 3279 goto clear_ifindices;
811ef482
CB
3280
3281 ret = lxc_netdev_delete_by_name(hostveth);
3282 if (ret < 0) {
24548539
CB
3283 WARN("Failed to remove interface \"%s\" from \"%s\"",
3284 hostveth, netdev->link);
66a7c406 3285 goto clear_ifindices;
811ef482
CB
3286 }
3287 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3288
c869be20 3289 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3290 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3291 netdev->ifindex = 0;
3292 netdev->priv.veth_attr.ifindex = 0;
3293 goto clear_ifindices;
811ef482
CB
3294 }
3295
3296 /* Delete the openvswitch port. */
3297 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3298 if (ret < 0)
3299 WARN("Failed to remove port \"%s\" from openvswitch "
3300 "bridge \"%s\"", hostveth, netdev->link);
3301 else
3302 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3303 hostveth, netdev->link);
3304
66a7c406 3305clear_ifindices:
ad2ddfcd 3306 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3307 * have cached stale data which would cause it to fail on reboot
3308 * we're we don't re-read the on-disk config file.
3309 */
3310 netdev->ifindex = 0;
3311 if (netdev->type == LXC_NET_PHYS) {
3312 netdev->priv.phys_attr.ifindex = 0;
3313 } else if (netdev->type == LXC_NET_VETH) {
3314 netdev->priv.veth_attr.veth1[0] = '\0';
3315 netdev->priv.veth_attr.ifindex = 0;
3316 }
811ef482
CB
3317 }
3318
bb84beda 3319 return true;
811ef482
CB
3320}
3321
3322int lxc_requests_empty_network(struct lxc_handler *handler)
3323{
3324 struct lxc_list *network = &handler->conf->network;
3325 struct lxc_list *iterator;
3326 bool found_none = false, found_nic = false;
3327
3328 if (lxc_list_empty(network))
3329 return 0;
3330
3331 lxc_list_for_each(iterator, network) {
3332 struct lxc_netdev *netdev = iterator->elem;
3333
3334 if (netdev->type == LXC_NET_NONE)
3335 found_none = true;
3336 else
3337 found_nic = true;
3338 }
3339 if (found_none && !found_nic)
3340 return 1;
3341 return 0;
3342}
3343
3344/* try to move physical nics to the init netns */
b809f232 3345int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3346{
3347 int ret;
b809f232 3348 int oldfd;
811ef482 3349 char ifname[IFNAMSIZ];
b809f232 3350 struct lxc_list *iterator;
28d9e29e 3351 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3352 struct lxc_conf *conf = handler->conf;
811ef482 3353
b809f232
CB
3354 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3355 * the parent network namespace. We won't have this capability if we are
3356 * unprivileged.
3357 */
d0fbc7ba 3358 if (!handler->am_root)
b809f232 3359 return 0;
811ef482 3360
b809f232 3361 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3362
0037ab49 3363 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3364 if (oldfd < 0) {
3365 SYSERROR("Failed to preserve network namespace");
b809f232 3366 return -1;
811ef482
CB
3367 }
3368
b809f232 3369 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3370 if (ret < 0) {
3371 SYSERROR("Failed to enter network namespace");
3372 close(oldfd);
b809f232 3373 return -1;
811ef482
CB
3374 }
3375
b809f232
CB
3376 lxc_list_for_each(iterator, &conf->network) {
3377 struct lxc_netdev *netdev = iterator->elem;
811ef482 3378
b809f232
CB
3379 if (netdev->type != LXC_NET_PHYS)
3380 continue;
3381
3382 /* Retrieve the name of the interface in the container's network
3383 * namespace.
3384 */
3385 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3386 WARN("No interface corresponding to ifindex %d",
b809f232 3387 netdev->ifindex);
811ef482
CB
3388 continue;
3389 }
b809f232 3390
0037ab49 3391 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3392 if (ret < 0)
811ef482
CB
3393 WARN("Error moving network device \"%s\" back to "
3394 "network namespace", ifname);
b809f232
CB
3395 else
3396 TRACE("Moved network device \"%s\" back to network "
3397 "namespace", ifname);
811ef482 3398 }
811ef482 3399
b809f232 3400 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3401 close(oldfd);
b809f232
CB
3402 if (ret < 0) {
3403 SYSERROR("Failed to enter network namespace");
3404 return -1;
3405 }
3406
3407 return 0;
811ef482
CB
3408}
3409
3410static int setup_hw_addr(char *hwaddr, const char *ifname)
3411{
3412 struct sockaddr sockaddr;
3413 struct ifreq ifr;
6d1400b5 3414 int ret, fd;
811ef482
CB
3415
3416 ret = lxc_convert_mac(hwaddr, &sockaddr);
3417 if (ret) {
6d1400b5 3418 errno = -ret;
3419 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3420 return -1;
3421 }
3422
3423 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3424 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3425 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3426
ad9429e5 3427 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3428 if (fd < 0)
3429 return -1;
3430
3431 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3432 if (ret)
6d1400b5 3433 SYSERROR("Failed to perform ioctl");
3434
3435 close(fd);
811ef482
CB
3436
3437 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3438 ifr.ifr_name);
3439
3440 return ret;
3441}
3442
3443static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3444{
3445 struct lxc_list *iterator;
3446 int err;
3447
3448 lxc_list_for_each(iterator, ip) {
3449 struct lxc_inetdev *inetdev = iterator->elem;
3450
3451 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3452 &inetdev->bcast, inetdev->prefix);
3453 if (err) {
6d1400b5 3454 errno = -err;
3455 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3456 "with ifindex %d", ifindex);
811ef482
CB
3457 return -1;
3458 }
3459 }
3460
3461 return 0;
3462}
3463
3464static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3465{
3466 struct lxc_list *iterator;
3467 int err;
3468
3469 lxc_list_for_each(iterator, ip) {
3470 struct lxc_inet6dev *inet6dev = iterator->elem;
3471
3472 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3473 &inet6dev->mcast, &inet6dev->acast,
3474 inet6dev->prefix);
3475 if (err) {
6d1400b5 3476 errno = -err;
3477 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3478 "with ifindex %d", ifindex);
811ef482
CB
3479 return -1;
3480 }
3481 }
3482
3483 return 0;
3484}
3485
3486static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3487{
3488 char ifname[IFNAMSIZ];
3489 int err;
811ef482 3490 char *current_ifname = ifname;
009d6127 3491 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3492
3493 /* empty network namespace */
3494 if (!netdev->ifindex) {
3495 if (netdev->flags & IFF_UP) {
3496 err = lxc_netdev_up("lo");
3497 if (err) {
6d1400b5 3498 errno = -err;
3499 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3500 return -1;
3501 }
3502 }
3503
3504 if (netdev->type == LXC_NET_EMPTY)
3505 return 0;
3506
3507 if (netdev->type == LXC_NET_NONE)
3508 return 0;
3509
e389f2af
CB
3510 netdev->ifindex = if_nametoindex(netdev->created_name);
3511 if (!netdev->ifindex)
3512 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3513 netdev->name ?: "(null)");
811ef482
CB
3514 }
3515
3516 /* get the new ifindex in case of physical netdev */
3517 if (netdev->type == LXC_NET_PHYS) {
3518 netdev->ifindex = if_nametoindex(netdev->link);
3519 if (!netdev->ifindex) {
3520 ERROR("Failed to get ifindex for network device \"%s\"",
3521 netdev->link);
3522 return -1;
3523 }
3524 }
3525
3526 /* retrieve the name of the interface */
3527 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3528 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3529 netdev->ifindex);
811ef482
CB
3530 return -1;
3531 }
3532
e389f2af 3533 /* Default: let the system choose an interface name.
811ef482
CB
3534 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3535 * netlink will replace the format specifier with an appropriate index.
3536 */
de4855a8
CB
3537 if (netdev->name[0] == '\0') {
3538 if (netdev->type == LXC_NET_PHYS)
94b1cade 3539 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3540 else
94b1cade 3541 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3542 }
811ef482
CB
3543
3544 /* rename the interface name */
e389f2af
CB
3545 if (strcmp(current_ifname, netdev->name) != 0) {
3546 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3547 if (err) {
6d1400b5 3548 errno = -err;
3549 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3550 current_ifname, netdev->name);
811ef482
CB
3551 return -1;
3552 }
e389f2af
CB
3553
3554 TRACE("Renamed network device from \"%s\" to \"%s\"",
3555 current_ifname, netdev->name);
811ef482
CB
3556 }
3557
3558 /* Re-read the name of the interface because its name has changed
3559 * and would be automatically allocated by the system
3560 */
3561 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3562 ERROR("Failed get name for network device with ifindex %d",
3563 netdev->ifindex);
3564 return -1;
3565 }
3566
790255cf
CB
3567 /* Now update the recorded name of the network device to reflect the
3568 * name of the network device in the child's network namespace. We will
3569 * later on send this information back to the parent.
3570 */
94b1cade 3571 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3572
811ef482
CB
3573 /* set a mac address */
3574 if (netdev->hwaddr) {
3575 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3576 ERROR("Failed to setup hw address for network device \"%s\"",
3577 current_ifname);
3578 return -1;
3579 }
3580 }
3581
3582 /* setup ipv4 addresses on the interface */
3583 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3584 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3585 current_ifname);
811ef482
CB
3586 return -1;
3587 }
3588
3589 /* setup ipv6 addresses on the interface */
3590 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3591 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3592 current_ifname);
811ef482
CB
3593 return -1;
3594 }
3595
3596 /* set the network device up */
3597 if (netdev->flags & IFF_UP) {
811ef482
CB
3598 err = lxc_netdev_up(current_ifname);
3599 if (err) {
6d1400b5 3600 errno = -err;
3601 SYSERROR("Failed to set network device \"%s\" up",
3602 current_ifname);
811ef482
CB
3603 return -1;
3604 }
3605
3606 /* the network is up, make the loopback up too */
3607 err = lxc_netdev_up("lo");
3608 if (err) {
6d1400b5 3609 errno = -err;
3610 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3611 return -1;
3612 }
3613 }
3614
811ef482 3615 /* setup ipv4 gateway on the interface */
a2f9a670 3616 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3617 if (!(netdev->flags & IFF_UP)) {
3618 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3619 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3620 return -1;
3621 }
3622
3623 if (lxc_list_empty(&netdev->ipv4)) {
3624 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3625 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3626 return -1;
3627 }
3628
a2f9a670 3629 /* Setup device route if ipv4_gateway_dev is enabled */
3630 if (netdev->ipv4_gateway_dev) {
3631 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3632 if (err < 0) {
3633 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3634 current_ifname);
a2f9a670 3635 return minus_one_set_errno(-err);
811ef482 3636 }
a2f9a670 3637 } else {
009d6127 3638 /* Check the gateway address is valid */
3639 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3640 return minus_one_set_errno(errno);
3641
3642 /* Try adding a default route to the gateway address */
811ef482 3643 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3644 if (err < 0) {
3645 /* If adding the default route fails, this could be because the
3646 * gateway address is in a different subnet to the container's address.
3647 * To work around this, we try adding a static device route to the
3648 * gateway address first, and then try again.
3649 */
a2f9a670 3650 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3651 if (err < 0) {
a2f9a670 3652 errno = -err;
009d6127 3653 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3654 bufinet4, current_ifname);
009d6127 3655 return -1;
a2f9a670 3656 }
6d1400b5 3657
a2f9a670 3658 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3659 if (err < 0) {
a2f9a670 3660 errno = -err;
009d6127 3661 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3662 bufinet4, current_ifname);
a2f9a670 3663 return -1;
811ef482 3664 }
811ef482
CB
3665 }
3666 }
3667 }
3668
3669 /* setup ipv6 gateway on the interface */
a2f9a670 3670 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3671 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3672 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3673 current_ifname);
811ef482
CB
3674 return -1;
3675 }
3676
3677 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3678 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3679 current_ifname);
811ef482
CB
3680 return -1;
3681 }
3682
a2f9a670 3683 /* Setup device route if ipv6_gateway_dev is enabled */
3684 if (netdev->ipv6_gateway_dev) {
3685 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3686 if (err < 0) {
3687 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3688 current_ifname);
a2f9a670 3689 return minus_one_set_errno(-err);
811ef482 3690 }
a2f9a670 3691 } else {
009d6127 3692 /* Check the gateway address is valid */
3693 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3694 return minus_one_set_errno(errno);
3695
3696 /* Try adding a default route to the gateway address */
811ef482 3697 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3698 if (err < 0) {
3699 /* If adding the default route fails, this could be because the
3700 * gateway address is in a different subnet to the container's address.
3701 * To work around this, we try adding a static device route to the
3702 * gateway address first, and then try again.
3703 */
a2f9a670 3704 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3705 if (err < 0) {
a2f9a670 3706 errno = -err;
009d6127 3707 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3708 bufinet6, current_ifname);
009d6127 3709 return -1;
a2f9a670 3710 }
6d1400b5 3711
a2f9a670 3712 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3713 if (err < 0) {
a2f9a670 3714 errno = -err;
009d6127 3715 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3716 bufinet6, current_ifname);
a2f9a670 3717 return -1;
811ef482 3718 }
811ef482
CB
3719 }
3720 }
3721 }
3722
74c6e2b0 3723 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3724
3725 return 0;
3726}
3727
3728int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3729 struct lxc_list *network)
3730{
3731 struct lxc_list *iterator;
811ef482 3732
811ef482 3733 lxc_list_for_each(iterator, network) {
e389f2af 3734 struct lxc_netdev *netdev = iterator->elem;
811ef482 3735
811ef482 3736 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3737 ERROR("Failed to setup netdev");
811ef482
CB
3738 return -1;
3739 }
3740 }
3741
3742 if (!lxc_list_empty(network))
e389f2af 3743 INFO("Network has been setup");
811ef482
CB
3744
3745 return 0;
3746}
7ab1ba02
CB
3747
3748int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3749{
3750 struct lxc_list *iterator;
3751 struct lxc_list *network = &handler->conf->network;
3752 int data_sock = handler->data_sock[0];
3753
7ab1ba02
CB
3754 lxc_list_for_each(iterator, network) {
3755 int ret;
3756 struct lxc_netdev *netdev = iterator->elem;
3757
3758 if (netdev->type != LXC_NET_VETH)
3759 continue;
3760
7fbb15ec 3761 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3762 if (ret < 0)
7ab1ba02 3763 return -1;
e389f2af
CB
3764
3765 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3766 if (ret < 0)
3767 return -1;
3768
3769 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3770 }
3771
3772 return 0;
3773}
3774
3775int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3776{
3777 struct lxc_list *iterator;
3778 struct lxc_list *network = &handler->conf->network;
3779 int data_sock = handler->data_sock[1];
3780
7ab1ba02
CB
3781 lxc_list_for_each(iterator, network) {
3782 int ret;
3783 struct lxc_netdev *netdev = iterator->elem;
3784
3785 if (netdev->type != LXC_NET_VETH)
3786 continue;
3787
e3233f26 3788 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3789 if (ret < 0)
7ab1ba02 3790 return -1;
e389f2af
CB
3791
3792 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3793 if (ret < 0)
3794 return -1;
3795 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3796 }
3797
3798 return 0;
3799}
a1ae535a
CB
3800
3801int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3802{
3803 struct lxc_list *iterator, *network;
3804 int data_sock = handler->data_sock[0];
3805
3806 if (!handler->am_root)
3807 return 0;
3808
3809 network = &handler->conf->network;
3810 lxc_list_for_each(iterator, network) {
3811 int ret;
3812 struct lxc_netdev *netdev = iterator->elem;
3813
3814 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3815 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3816 if (ret < 0)
7729f8e5 3817 return -1;
a1ae535a
CB
3818
3819 /* Send network device ifindex in the child's namespace to
3820 * parent.
3821 */
7fbb15ec 3822 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3823 if (ret < 0)
7729f8e5 3824 return -1;
a1ae535a
CB
3825 }
3826
e389f2af
CB
3827 if (!lxc_list_empty(network))
3828 TRACE("Sent network device names and ifindices to parent");
3829
a1ae535a 3830 return 0;
a1ae535a
CB
3831}
3832
3833int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3834{
3835 struct lxc_list *iterator, *network;
3836 int data_sock = handler->data_sock[1];
3837
3838 if (!handler->am_root)
3839 return 0;
3840
3841 network = &handler->conf->network;
3842 lxc_list_for_each(iterator, network) {
3843 int ret;
3844 struct lxc_netdev *netdev = iterator->elem;
3845
3846 /* Receive network device name in the child's namespace to
3847 * parent.
3848 */
e3233f26 3849 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3850 if (ret < 0)
7729f8e5 3851 return -1;
a1ae535a
CB
3852
3853 /* Receive network device ifindex in the child's namespace to
3854 * parent.
3855 */
e3233f26 3856 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3857 if (ret < 0)
7729f8e5 3858 return -1;
a1ae535a
CB
3859 }
3860
3861 return 0;
a1ae535a 3862}
bb84beda
CB
3863
3864void lxc_delete_network(struct lxc_handler *handler)
3865{
3866 bool bret;
3867
3868 if (handler->am_root)
3869 bret = lxc_delete_network_priv(handler);
3870 else
3871 bret = lxc_delete_network_unpriv(handler);
3872 if (!bret)
3873 DEBUG("Failed to delete network devices");
3874 else
3875 DEBUG("Deleted network devices");
3876}
1cd95214 3877
1cd95214
CB
3878int lxc_netns_set_nsid(int fd)
3879{
41a3300d 3880 int ret;
0ce60f0d
CB
3881 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3882 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3883 NLMSG_ALIGN(1024)];
1cd95214 3884 struct nl_handler nlh;
0ce60f0d
CB
3885 struct nlmsghdr *hdr;
3886 struct rtgenmsg *msg;
bfcedc7e 3887 int saved_errno;
9d036caa
CB
3888 const __s32 ns_id = -1;
3889 const __u32 netns_fd = fd;
1cd95214
CB
3890
3891 ret = netlink_open(&nlh, NETLINK_ROUTE);
3892 if (ret < 0)
41a3300d 3893 return -1;
1cd95214 3894
0ce60f0d 3895 memset(buf, 0, sizeof(buf));
6ce39620
CB
3896
3897#pragma GCC diagnostic push
3898#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3899 hdr = (struct nlmsghdr *)buf;
3900 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3901#pragma GCC diagnostic pop
1cd95214 3902
0ce60f0d
CB
3903 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3904 hdr->nlmsg_type = RTM_NEWNSID;
3905 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3906 hdr->nlmsg_pid = 0;
3907 hdr->nlmsg_seq = RTM_NEWNSID;
3908 msg->rtgen_family = AF_UNSPEC;
1cd95214 3909
9d036caa
CB
3910 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3911 if (ret < 0)
3912 goto on_error;
3913
3914 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3915 if (ret < 0)
3916 goto on_error;
1cd95214 3917
9fbbc427 3918 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3919
3920on_error:
bfcedc7e 3921 saved_errno = errno;
1cd95214 3922 netlink_close(&nlh);
bfcedc7e 3923 errno = saved_errno;
1cd95214 3924
9d036caa 3925 return ret;
1cd95214 3926}
938980ba
CB
3927
3928static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3929{
3930
3931 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3932
3933 while (RTA_OK(rta, len)) {
3934 unsigned short type = rta->rta_type;
3935
3936 if ((type <= max) && (!tb[type]))
3937 tb[type] = rta;
3938
6ce39620
CB
3939#pragma GCC diagnostic push
3940#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3941 rta = RTA_NEXT(rta, len);
6ce39620 3942#pragma GCC diagnostic pop
938980ba
CB
3943 }
3944
3945 return 0;
3946}
3947
3948static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3949{
3950 return *(__s32 *)RTA_DATA(rta);
3951}
3952
3953#ifndef NETNS_RTA
3954#define NETNS_RTA(r) \
3955 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3956#endif
3957
3958int lxc_netns_get_nsid(int fd)
3959{
3960 int ret;
3961 ssize_t len;
3962 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3963 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3964 NLMSG_ALIGN(1024)];
938980ba
CB
3965 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3966 struct nl_handler nlh;
3967 struct nlmsghdr *hdr;
3968 struct rtgenmsg *msg;
3969 int saved_errno;
3970 __u32 netns_fd = fd;
3971
3972 ret = netlink_open(&nlh, NETLINK_ROUTE);
3973 if (ret < 0)
3974 return -1;
3975
3976 memset(buf, 0, sizeof(buf));
6ce39620
CB
3977
3978#pragma GCC diagnostic push
3979#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3980 hdr = (struct nlmsghdr *)buf;
3981 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3982#pragma GCC diagnostic pop
938980ba
CB
3983
3984 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3985 hdr->nlmsg_type = RTM_GETNSID;
3986 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3987 hdr->nlmsg_pid = 0;
3988 hdr->nlmsg_seq = RTM_GETNSID;
3989 msg->rtgen_family = AF_UNSPEC;
3990
9d036caa
CB
3991 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3992 if (ret == 0)
3993 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3994
938980ba
CB
3995 saved_errno = errno;
3996 netlink_close(&nlh);
3997 errno = saved_errno;
3998 if (ret < 0)
3999 return -1;
4000
9d036caa 4001 errno = EINVAL;
938980ba
CB
4002 msg = NLMSG_DATA(hdr);
4003 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4004 if (len < 0)
4005 return -1;
4006
6ce39620
CB
4007#pragma GCC diagnostic push
4008#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4009 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4010 if (tb[__LXC_NETNSA_NSID])
4011 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4012#pragma GCC diagnostic pop
938980ba
CB
4013
4014 return -1;
4015}
e389f2af
CB
4016
4017int lxc_create_network(struct lxc_handler *handler)
4018{
4019 int ret;
4020
4021 /*
4022 * Find gateway addresses from the link device, which is no longer
4023 * accessible inside the container. Do this before creating network
4024 * interfaces, since goto out_delete_net does not work before
4025 * lxc_clone.
4026 */
4027 ret = lxc_find_gateway_addresses(handler);
4028 if (ret) {
4029 ERROR("Failed to find gateway addresses");
4030 return -1;
4031 }
4032
4033 if (handler->am_root) {
4034 ret = lxc_create_network_priv(handler);
4035 if (ret)
4036 return -1;
4037
4038 return lxc_network_move_created_netdev_priv(handler);
4039 }
4040
4041 return lxc_create_network_unpriv(handler);
4042}