]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
Merge pull request #3005 from tomponline/tp-phys-ns-restore
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
b670016a 71static const char loDev[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
335 char peerbuf[IFNAMSIZ], *peer;
336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peerbuf))
346 return -1;
347
348 peer = lxc_mkifname(peerbuf);
349 if (!peer)
350 return -1;
351
352 err = lxc_macvlan_create(netdev->link, peer,
353 netdev->priv.macvlan_attr.mode);
354 if (err) {
6d1400b5 355 errno = -err;
356 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
357 peer, netdev->link);
966e9f1f 358 goto on_error;
811ef482
CB
359 }
360
361 netdev->ifindex = if_nametoindex(peer);
362 if (!netdev->ifindex) {
363 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 364 goto on_error;
811ef482
CB
365 }
366
3bef7b7b
TP
367 if (netdev->mtu) {
368 err = lxc_safe_uint(netdev->mtu, &mtu);
369 if (err < 0) {
370 errno = -err;
371 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
372 goto on_error;
373 }
374
375 err = lxc_netdev_set_mtu(peer, mtu);
376 if (err < 0) {
377 errno = -err;
378 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
379 goto on_error;
380 }
381 }
382
811ef482 383 if (netdev->upscript) {
14a7b0f9
CB
384 char *argv[] = {
385 "macvlan",
386 netdev->link,
387 NULL,
388 };
389
390 err = run_script_argv(handler->name,
391 handler->conf->hooks_version, "net",
392 netdev->upscript, "up", argv);
393 if (err < 0)
966e9f1f 394 goto on_error;
811ef482
CB
395 }
396
397 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
398 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
399
400 return 0;
966e9f1f
CB
401
402on_error:
811ef482 403 lxc_netdev_delete_by_name(peer);
811ef482
CB
404 return -1;
405}
406
c9f52382 407static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
408{
409 int err, index, len;
410 struct ifinfomsg *ifi;
411 struct nl_handler nlh;
412 struct rtattr *nest, *nest2;
413 struct nlmsg *answer = NULL, *nlmsg = NULL;
414
415 len = strlen(master);
416 if (len == 1 || len >= IFNAMSIZ)
417 return minus_one_set_errno(EINVAL);
418
419 len = strlen(name);
420 if (len == 1 || len >= IFNAMSIZ)
421 return minus_one_set_errno(EINVAL);
422
423 index = if_nametoindex(master);
424 if (!index)
425 return minus_one_set_errno(EINVAL);
426
427 err = netlink_open(&nlh, NETLINK_ROUTE);
428 if (err)
429 return minus_one_set_errno(-err);
430
431 err = -ENOMEM;
432 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
433 if (!nlmsg)
434 goto out;
435
436 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
437 if (!answer)
438 goto out;
439
440 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
441 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
442
443 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
444 if (!ifi) {
445 goto out;
446 }
447 ifi->ifi_family = AF_UNSPEC;
448
449 err = -EPROTO;
450 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
451 if (!nest)
452 goto out;
453
454 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
455 goto out;
456
457 if (mode) {
458 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
459 if (!nest2)
460 goto out;
461
462 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
463 goto out;
464
465 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
466 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
467 */
468 if (isolation > 0) {
469 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
470 goto out;
471 }
472
473 nla_end_nested(nlmsg, nest2);
474 }
475
476 nla_end_nested(nlmsg, nest);
477
478 if (nla_put_u32(nlmsg, IFLA_LINK, index))
479 goto out;
480
481 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
482 goto out;
483
484 err = netlink_transaction(&nlh, nlmsg, answer);
485out:
486 netlink_close(&nlh);
487 nlmsg_free(answer);
488 nlmsg_free(nlmsg);
489 if (err < 0)
490 return minus_one_set_errno(-err);
491 return 0;
492}
493
494static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
495{
496 char peerbuf[IFNAMSIZ], *peer;
497 int err;
006e135e 498 unsigned int mtu = 0;
c9f52382 499
500 if (netdev->link[0] == '\0') {
501 ERROR("No link for ipvlan network device specified");
502 return -1;
503 }
504
505 err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
506 if (err < 0 || (size_t)err >= sizeof(peerbuf))
507 return -1;
508
509 peer = lxc_mkifname(peerbuf);
510 if (!peer)
511 return -1;
512
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
514 if (err) {
515 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
516 goto on_error;
517 }
518
519 netdev->ifindex = if_nametoindex(peer);
520 if (!netdev->ifindex) {
521 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
522 goto on_error;
523 }
524
006e135e 525 if (netdev->mtu) {
526 err = lxc_safe_uint(netdev->mtu, &mtu);
527 if (err < 0) {
528 errno = -err;
529 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
530 goto on_error;
531 }
532
533 err = lxc_netdev_set_mtu(peer, mtu);
534 if (err < 0) {
535 errno = -err;
536 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
537 goto on_error;
538 }
539 }
540
c9f52382 541 if (netdev->upscript) {
542 char *argv[] = {
543 "ipvlan",
544 netdev->link,
545 NULL,
546 };
547
548 err = run_script_argv(handler->name,
549 handler->conf->hooks_version, "net",
550 netdev->upscript, "up", argv);
551 if (err < 0)
552 goto on_error;
553 }
554
555 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
556 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
557
558 return 0;
559
560on_error:
561 lxc_netdev_delete_by_name(peer);
562 return -1;
563}
564
811ef482
CB
565static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
566{
567 char peer[IFNAMSIZ];
568 int err;
569 static uint16_t vlan_cntr = 0;
570 unsigned int mtu = 0;
571
de4855a8 572 if (netdev->link[0] == '\0') {
811ef482
CB
573 ERROR("No link for vlan network device specified");
574 return -1;
575 }
576
577 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
578 if (err < 0 || (size_t)err >= sizeof(peer))
579 return -1;
580
581 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
582 if (err) {
6d1400b5 583 errno = -err;
584 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
585 peer, netdev->link);
811ef482
CB
586 return -1;
587 }
588
589 netdev->ifindex = if_nametoindex(peer);
590 if (!netdev->ifindex) {
591 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 592 goto on_error;
593 }
594
595 if (netdev->mtu) {
596 err = lxc_safe_uint(netdev->mtu, &mtu);
597 if (err < 0) {
598 errno = -err;
599 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
600 goto on_error;
601 }
602
603 err = lxc_netdev_set_mtu(peer, mtu);
604 if (err) {
605 errno = -err;
606 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
607 goto on_error;
608 }
811ef482
CB
609 }
610
3a73d9f1 611 if (netdev->upscript) {
612 char *argv[] = {
613 "vlan",
614 netdev->link,
615 NULL,
616 };
617
618 err = run_script_argv(handler->name,
619 handler->conf->hooks_version, "net",
620 netdev->upscript, "up", argv);
19abca58 621 if (err < 0) {
3e2a7b08 622 goto on_error;
19abca58 623 }
3a73d9f1 624 }
625
3bef7b7b 626 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
811ef482 627 peer, netdev->ifindex);
811ef482
CB
628
629 return 0;
3e2a7b08 630
631on_error:
632 lxc_netdev_delete_by_name(peer);
633 return -1;
811ef482
CB
634}
635
636static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
637{
0b154989 638 int err, mtu_orig = 0;
3bef7b7b 639 unsigned int mtu = 0;
14a7b0f9 640
de4855a8 641 if (netdev->link[0] == '\0') {
811ef482
CB
642 ERROR("No link for physical interface specified");
643 return -1;
644 }
645
790255cf
CB
646 /* Note that we're retrieving the container's ifindex in the host's
647 * network namespace because we need it to move the device from the
648 * host's network namespace to the container's network namespace later
649 * on.
650 * Note that netdev->link will contain the name of the physical network
651 * device in the host's namespace.
652 */
811ef482
CB
653 netdev->ifindex = if_nametoindex(netdev->link);
654 if (!netdev->ifindex) {
655 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
656 return -1;
657 }
658
790255cf
CB
659 /* Store the ifindex of the host's network device in the host's
660 * namespace.
661 */
662 netdev->priv.phys_attr.ifindex = netdev->ifindex;
663
0b154989
TP
664 /* Get original device MTU setting and store for restoration after container shutdown. */
665 mtu_orig = netdev_get_mtu(netdev->ifindex);
666 if (mtu_orig < 0) {
667 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
668 return minus_one_set_errno(-mtu_orig);
669 }
670
671 netdev->priv.phys_attr.mtu = mtu_orig;
672
3bef7b7b
TP
673 if (netdev->mtu) {
674 err = lxc_safe_uint(netdev->mtu, &mtu);
675 if (err < 0) {
676 errno = -err;
677 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
678 return -1;
679 }
14a7b0f9 680
3bef7b7b
TP
681 err = lxc_netdev_set_mtu(netdev->link, mtu);
682 if (err < 0) {
683 errno = -err;
684 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
685 return -1;
686 }
687 }
688
689 if (netdev->upscript) {
690 char *argv[] = {
691 "phys",
692 netdev->link,
693 NULL,
694 };
695
696 err = run_script_argv(handler->name,
697 handler->conf->hooks_version, "net",
698 netdev->upscript, "up", argv);
699 if (err < 0) {
700 return -1;
701 }
702 }
703
704 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
811ef482
CB
705
706 return 0;
707}
708
709static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
710{
14a7b0f9
CB
711 int ret;
712 char *argv[] = {
713 "empty",
714 NULL,
715 };
716
811ef482 717 netdev->ifindex = 0;
14a7b0f9
CB
718 if (!netdev->upscript)
719 return 0;
720
721 ret = run_script_argv(handler->name, handler->conf->hooks_version,
722 "net", netdev->upscript, "up", argv);
723 if (ret < 0)
724 return -1;
725
811ef482
CB
726 return 0;
727}
728
729static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
730{
731 netdev->ifindex = 0;
732 return 0;
733}
734
735static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
736 [LXC_NET_VETH] = instantiate_veth,
737 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 738 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
739 [LXC_NET_VLAN] = instantiate_vlan,
740 [LXC_NET_PHYS] = instantiate_phys,
741 [LXC_NET_EMPTY] = instantiate_empty,
742 [LXC_NET_NONE] = instantiate_none,
743};
744
745static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
746{
14a7b0f9
CB
747 int ret;
748 char *argv[] = {
749 "veth",
750 netdev->link,
751 NULL,
752 NULL,
753 };
754
755 if (!netdev->downscript)
756 return 0;
811ef482 757
de4855a8 758 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 759 argv[2] = netdev->priv.veth_attr.pair;
811ef482 760 else
14a7b0f9
CB
761 argv[2] = netdev->priv.veth_attr.veth1;
762
763 ret = run_script_argv(handler->name,
764 handler->conf->hooks_version, "net",
765 netdev->downscript, "down", argv);
766 if (ret < 0)
767 return -1;
811ef482 768
811ef482
CB
769 return 0;
770}
771
772static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
773{
14a7b0f9
CB
774 int ret;
775 char *argv[] = {
776 "macvlan",
777 netdev->link,
778 NULL,
779 };
780
781 if (!netdev->downscript)
782 return 0;
783
784 ret = run_script_argv(handler->name, handler->conf->hooks_version,
785 "net", netdev->downscript, "down", argv);
786 if (ret < 0)
787 return -1;
811ef482 788
811ef482
CB
789 return 0;
790}
791
c9f52382 792static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
793{
794 int ret;
795 char *argv[] = {
796 "ipvlan",
797 netdev->link,
798 NULL,
799 };
800
801 if (!netdev->downscript)
802 return 0;
803
804 ret = run_script_argv(handler->name, handler->conf->hooks_version,
805 "net", netdev->downscript, "down", argv);
806 if (ret < 0)
807 return -1;
808
809 return 0;
810}
811
811ef482
CB
812static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
813{
3a73d9f1 814 int ret;
815 char *argv[] = {
816 "vlan",
817 netdev->link,
818 NULL,
819 };
820
821 if (!netdev->downscript)
822 return 0;
823
824 ret = run_script_argv(handler->name, handler->conf->hooks_version,
825 "net", netdev->downscript, "down", argv);
826 if (ret < 0)
827 return -1;
828
811ef482
CB
829 return 0;
830}
831
832static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
833{
14a7b0f9
CB
834 int ret;
835 char *argv[] = {
836 "phys",
837 netdev->link,
838 NULL,
839 };
840
841 if (!netdev->downscript)
842 return 0;
843
844 ret = run_script_argv(handler->name, handler->conf->hooks_version,
845 "net", netdev->downscript, "down", argv);
846 if (ret < 0)
847 return -1;
811ef482 848
811ef482
CB
849 return 0;
850}
851
852static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
853{
14a7b0f9
CB
854 int ret;
855 char *argv[] = {
856 "empty",
857 NULL,
858 };
859
860 if (!netdev->downscript)
861 return 0;
862
863 ret = run_script_argv(handler->name, handler->conf->hooks_version,
864 "net", netdev->downscript, "down", argv);
865 if (ret < 0)
866 return -1;
811ef482 867
811ef482
CB
868 return 0;
869}
870
871static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
872{
873 return 0;
874}
875
876static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
877 [LXC_NET_VETH] = shutdown_veth,
878 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 879 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
880 [LXC_NET_VLAN] = shutdown_vlan,
881 [LXC_NET_PHYS] = shutdown_phys,
882 [LXC_NET_EMPTY] = shutdown_empty,
883 [LXC_NET_NONE] = shutdown_none,
884};
885
0037ab49
TP
886static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
887{
888 int err;
889 struct nl_handler nlh;
890 struct ifinfomsg *ifi;
891 struct nlmsg *nlmsg = NULL;
892
893 err = netlink_open(&nlh, NETLINK_ROUTE);
894 if (err)
895 return err;
896
897 err = -ENOMEM;
898 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
899 if (!nlmsg)
900 goto out;
901
902 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
903 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
904
905 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
906 if (!ifi)
907 goto out;
908 ifi->ifi_family = AF_UNSPEC;
909 ifi->ifi_index = ifindex;
910
911 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
912 goto out;
913
914 if (ifname != NULL) {
915 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
916 goto out;
917 }
918
919 err = netlink_transaction(&nlh, nlmsg, nlmsg);
920out:
921 netlink_close(&nlh);
922 nlmsg_free(nlmsg);
923 return err;
924}
925
ebc73a67 926int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 927{
ebc73a67 928 int err;
0ad19a3f 929 struct nl_handler nlh;
06f976ca 930 struct ifinfomsg *ifi;
ebc73a67 931 struct nlmsg *nlmsg = NULL;
0ad19a3f 932
3cfc0f3a
MN
933 err = netlink_open(&nlh, NETLINK_ROUTE);
934 if (err)
935 return err;
0ad19a3f 936
3cfc0f3a 937 err = -ENOMEM;
0ad19a3f 938 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
939 if (!nlmsg)
940 goto out;
941
ebc73a67 942 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
943 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
944
945 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
946 if (!ifi)
947 goto out;
06f976ca
SZ
948 ifi->ifi_family = AF_UNSPEC;
949 ifi->ifi_index = ifindex;
0ad19a3f 950
951 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
952 goto out;
953
8d357196
DY
954 if (ifname != NULL) {
955 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
956 goto out;
957 }
958
3cfc0f3a 959 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 960out:
961 netlink_close(&nlh);
962 nlmsg_free(nlmsg);
963 return err;
964}
965
ebc73a67
CB
966/* If we are asked to move a wireless interface, then we must actually move its
967 * phyN device. Detect that condition and return the physname here. The physname
968 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
969 */
970#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 971static char *is_wlan(const char *ifname)
e5848d39 972{
b0293710 973 __do_free char *path = NULL;
ebc73a67 974 int i, ret;
e5848d39 975 long physlen;
ebc73a67 976 size_t len;
e5848d39 977 FILE *f;
ebc73a67 978 char *physname = NULL;
e5848d39 979
ebc73a67 980 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 981 path = must_realloc(NULL, len + 1);
e5848d39 982 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 983 if (ret < 0 || (size_t)ret >= len)
e5848d39 984 goto bad;
ebc73a67 985
ebc73a67
CB
986 f = fopen(path, "r");
987 if (!f)
e5848d39 988 goto bad;
ebc73a67 989
1a0e70ac 990 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
991 fseek(f, 0, SEEK_END);
992 physlen = ftell(f);
993 fseek(f, 0, SEEK_SET);
7d1cde93
SX
994 if (physlen < 0) {
995 fclose(f);
0382c0da 996 goto bad;
7d1cde93 997 }
ebc73a67
CB
998
999 physname = malloc(physlen + 1);
ee54ea9a 1000 if (!physname) {
acf47e1b 1001 fclose(f);
e5848d39 1002 goto bad;
ee54ea9a 1003 }
ebc73a67
CB
1004
1005 memset(physname, 0, physlen + 1);
e5848d39
SH
1006 ret = fread(physname, 1, physlen, f);
1007 fclose(f);
1008 if (ret < 0)
1009 goto bad;
1010
ebc73a67 1011 for (i = 0; i < physlen; i++) {
e5848d39
SH
1012 if (physname[i] == '\n')
1013 physname[i] = '\0';
ebc73a67 1014
e5848d39
SH
1015 if (physname[i] == '\0')
1016 break;
1017 }
1018
1019 return physname;
1020
1021bad:
f10fad2f 1022 free(physname);
e5848d39
SH
1023 return NULL;
1024}
1025
ebc73a67
CB
1026static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1027 const char *new)
e5848d39 1028{
ebc73a67 1029 pid_t fpid;
e5848d39 1030
ebc73a67 1031 fpid = fork();
e5848d39
SH
1032 if (fpid < 0)
1033 return -1;
ebc73a67 1034
e5848d39
SH
1035 if (fpid != 0)
1036 return wait_for_pid(fpid);
ebc73a67 1037
e5848d39
SH
1038 if (!switch_to_ns(pid, "net"))
1039 return -1;
ebc73a67 1040
05ec44f8 1041 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1042}
1043
ebc73a67
CB
1044static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1045 const char *newname)
e5848d39 1046{
e5848d39 1047 char *cmd;
ebc73a67
CB
1048 pid_t fpid;
1049 int err = -1;
e5848d39
SH
1050
1051 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1052 * However, IIUC this involves a bit more complicated work to talk to
1053 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1054 */
1055 cmd = on_path("iw", NULL);
1056 if (!cmd)
1057 goto out1;
1058 free(cmd);
1059
1060 fpid = fork();
1061 if (fpid < 0)
1062 goto out1;
ebc73a67 1063
e5848d39
SH
1064 if (fpid == 0) {
1065 char pidstr[30];
1066 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1067 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1068 (char *)NULL);
ebd582ae 1069 _exit(EXIT_FAILURE);
e5848d39 1070 }
ebc73a67 1071
e5848d39
SH
1072 if (wait_for_pid(fpid))
1073 goto out1;
1074
1075 err = 0;
1076 if (newname)
1077 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1078
1079out1:
1080 free(physname);
1081 return err;
1082}
1083
8d357196 1084int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1085{
1086 int index;
e5848d39 1087 char *physname;
8befa924 1088
8befa924
SH
1089 if (!ifname)
1090 return -EINVAL;
1091
32571606 1092 index = if_nametoindex(ifname);
49428bf3
DY
1093 if (!index)
1094 return -EINVAL;
32571606 1095
ebc73a67
CB
1096 physname = is_wlan(ifname);
1097 if (physname)
e5848d39
SH
1098 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1099
8d357196 1100 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1101}
1102
b84f58b9 1103int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1104{
b84f58b9 1105 int err;
ebc73a67
CB
1106 struct ifinfomsg *ifi;
1107 struct nl_handler nlh;
1108 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1109
3cfc0f3a
MN
1110 err = netlink_open(&nlh, NETLINK_ROUTE);
1111 if (err)
1112 return err;
0ad19a3f 1113
3cfc0f3a 1114 err = -ENOMEM;
0ad19a3f 1115 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1116 if (!nlmsg)
1117 goto out;
1118
06f976ca 1119 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1120 if (!answer)
1121 goto out;
1122
ebc73a67 1123 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1124 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1125
1126 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1127 if (!ifi)
1128 goto out;
06f976ca
SZ
1129 ifi->ifi_family = AF_UNSPEC;
1130 ifi->ifi_index = ifindex;
0ad19a3f 1131
3cfc0f3a 1132 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1133out:
1134 netlink_close(&nlh);
1135 nlmsg_free(answer);
1136 nlmsg_free(nlmsg);
1137 return err;
1138}
1139
b84f58b9
DL
1140int lxc_netdev_delete_by_name(const char *name)
1141{
1142 int index;
1143
1144 index = if_nametoindex(name);
1145 if (!index)
1146 return -EINVAL;
1147
1148 return lxc_netdev_delete_by_index(index);
1149}
1150
1151int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1152{
ebc73a67 1153 int err, len;
06f976ca 1154 struct ifinfomsg *ifi;
ebc73a67
CB
1155 struct nl_handler nlh;
1156 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1157
3cfc0f3a
MN
1158 err = netlink_open(&nlh, NETLINK_ROUTE);
1159 if (err)
1160 return err;
b9a5bb58 1161
b84f58b9 1162 len = strlen(newname);
dae3fdf6 1163 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1164 goto out;
1165
3cfc0f3a 1166 err = -ENOMEM;
b9a5bb58
DL
1167 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1168 if (!nlmsg)
1169 goto out;
1170
06f976ca 1171 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1172 if (!answer)
1173 goto out;
1174
ebc73a67 1175 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1176 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1177
1178 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1179 if (!ifi)
1180 goto out;
06f976ca
SZ
1181 ifi->ifi_family = AF_UNSPEC;
1182 ifi->ifi_index = ifindex;
b84f58b9
DL
1183
1184 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1185 goto out;
b9a5bb58 1186
3cfc0f3a 1187 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1188out:
1189 netlink_close(&nlh);
1190 nlmsg_free(answer);
1191 nlmsg_free(nlmsg);
1192 return err;
1193}
1194
b84f58b9
DL
1195int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1196{
1197 int len, index;
1198
1199 len = strlen(oldname);
dae3fdf6 1200 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1201 return -EINVAL;
1202
1203 index = if_nametoindex(oldname);
1204 if (!index)
1205 return -EINVAL;
1206
1207 return lxc_netdev_rename_by_index(index, newname);
1208}
1209
8befa924 1210int netdev_set_flag(const char *name, int flag)
0ad19a3f 1211{
ebc73a67 1212 int err, index, len;
06f976ca 1213 struct ifinfomsg *ifi;
ebc73a67
CB
1214 struct nl_handler nlh;
1215 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1216
3cfc0f3a
MN
1217 err = netlink_open(&nlh, NETLINK_ROUTE);
1218 if (err)
1219 return err;
0ad19a3f 1220
3cfc0f3a 1221 err = -EINVAL;
0ad19a3f 1222 len = strlen(name);
dae3fdf6 1223 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1224 goto out;
1225
3cfc0f3a 1226 err = -ENOMEM;
0ad19a3f 1227 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1228 if (!nlmsg)
1229 goto out;
1230
06f976ca 1231 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1232 if (!answer)
1233 goto out;
1234
3cfc0f3a 1235 err = -EINVAL;
0ad19a3f 1236 index = if_nametoindex(name);
1237 if (!index)
1238 goto out;
1239
ebc73a67 1240 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1241 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1242
1243 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1244 if (!ifi) {
1245 err = -ENOMEM;
1246 goto out;
1247 }
06f976ca
SZ
1248 ifi->ifi_family = AF_UNSPEC;
1249 ifi->ifi_index = index;
1250 ifi->ifi_change |= IFF_UP;
1251 ifi->ifi_flags |= flag;
0ad19a3f 1252
1253 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1254out:
1255 netlink_close(&nlh);
1256 nlmsg_free(nlmsg);
1257 nlmsg_free(answer);
1258 return err;
1259}
1260
ebc73a67 1261int netdev_get_flag(const char *name, int *flag)
efa1cf45 1262{
ebc73a67 1263 int err, index, len;
a4318300 1264 struct ifinfomsg *ifi;
ebc73a67
CB
1265 struct nl_handler nlh;
1266 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1267
1268 if (!name)
1269 return -EINVAL;
1270
1271 err = netlink_open(&nlh, NETLINK_ROUTE);
1272 if (err)
1273 return err;
1274
1275 err = -EINVAL;
1276 len = strlen(name);
1277 if (len == 1 || len >= IFNAMSIZ)
1278 goto out;
1279
1280 err = -ENOMEM;
1281 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1282 if (!nlmsg)
1283 goto out;
1284
06f976ca 1285 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1286 if (!answer)
1287 goto out;
1288
1289 err = -EINVAL;
1290 index = if_nametoindex(name);
1291 if (!index)
1292 goto out;
1293
06f976ca
SZ
1294 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1295 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1296
1297 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1298 if (!ifi) {
1299 err = -ENOMEM;
1300 goto out;
1301 }
06f976ca
SZ
1302 ifi->ifi_family = AF_UNSPEC;
1303 ifi->ifi_index = index;
efa1cf45
DY
1304
1305 err = netlink_transaction(&nlh, nlmsg, answer);
1306 if (err)
1307 goto out;
1308
06f976ca 1309 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1310
1311 *flag = ifi->ifi_flags;
1312out:
1313 netlink_close(&nlh);
1314 nlmsg_free(nlmsg);
1315 nlmsg_free(answer);
1316 return err;
1317}
1318
1319/*
1320 * \brief Check a interface is up or not.
1321 *
1322 * \param name: name for the interface.
1323 *
1324 * \return int.
1325 * 0 means interface is down.
1326 * 1 means interface is up.
1327 * Others means error happened, and ret-value is the error number.
1328 */
ebc73a67 1329int lxc_netdev_isup(const char *name)
efa1cf45 1330{
ebc73a67 1331 int err, flag;
efa1cf45
DY
1332
1333 err = netdev_get_flag(name, &flag);
1334 if (err)
ebc73a67
CB
1335 return err;
1336
efa1cf45
DY
1337 if (flag & IFF_UP)
1338 return 1;
ebc73a67 1339
efa1cf45 1340 return 0;
efa1cf45
DY
1341}
1342
0130df54
SH
1343int netdev_get_mtu(int ifindex)
1344{
ebc73a67 1345 int answer_len, err, res;
0130df54 1346 struct nl_handler nlh;
06f976ca 1347 struct ifinfomsg *ifi;
0130df54 1348 struct nlmsghdr *msg;
ebc73a67
CB
1349 int readmore = 0, recv_len = 0;
1350 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1351
1352 err = netlink_open(&nlh, NETLINK_ROUTE);
1353 if (err)
1354 return err;
1355
1356 err = -ENOMEM;
1357 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1358 if (!nlmsg)
1359 goto out;
1360
06f976ca 1361 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1362 if (!answer)
1363 goto out;
1364
1365 /* Save the answer buffer length, since it will be overwritten
1366 * on the first receive (and we might need to receive more than
ebc73a67
CB
1367 * once.
1368 */
06f976ca
SZ
1369 answer_len = answer->nlmsghdr->nlmsg_len;
1370
ebc73a67 1371 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1372 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1373
06f976ca 1374 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1375 if (!ifi)
1376 goto out;
06f976ca 1377 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1378
1379 /* Send the request for addresses, which returns all addresses
1380 * on all interfaces. */
1381 err = netlink_send(&nlh, nlmsg);
1382 if (err < 0)
1383 goto out;
1384
6ce39620
CB
1385#pragma GCC diagnostic push
1386#pragma GCC diagnostic ignored "-Wcast-align"
1387
0130df54
SH
1388 do {
1389 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1390 * overwritten by a previous receive.
1391 */
06f976ca 1392 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1393
1394 /* Get the (next) batch of reply messages */
1395 err = netlink_rcv(&nlh, answer);
1396 if (err < 0)
1397 goto out;
1398
1399 recv_len = err;
0130df54
SH
1400
1401 /* Satisfy the typing for the netlink macros */
06f976ca 1402 msg = answer->nlmsghdr;
0130df54
SH
1403
1404 while (NLMSG_OK(msg, recv_len)) {
1405
1406 /* Stop reading if we see an error message */
1407 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1408 struct nlmsgerr *errmsg =
1409 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1410 err = errmsg->error;
1411 goto out;
1412 }
1413
1414 /* Stop reading if we see a NLMSG_DONE message */
1415 if (msg->nlmsg_type == NLMSG_DONE) {
1416 readmore = 0;
1417 break;
1418 }
1419
06f976ca 1420 ifi = NLMSG_DATA(msg);
0130df54
SH
1421 if (ifi->ifi_index == ifindex) {
1422 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1423 int attr_len =
1424 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1425 res = 0;
ebc73a67
CB
1426 while (RTA_OK(rta, attr_len)) {
1427 /* Found a local address for the
1428 * requested interface, return it.
1429 */
0130df54 1430 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1431 memcpy(&res, RTA_DATA(rta),
1432 sizeof(int));
0130df54
SH
1433 err = res;
1434 goto out;
1435 }
1436 rta = RTA_NEXT(rta, attr_len);
1437 }
0130df54
SH
1438 }
1439
ebc73a67
CB
1440 /* Keep reading more data from the socket if the last
1441 * message had the NLF_F_MULTI flag set.
1442 */
0130df54
SH
1443 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1444
ebc73a67 1445 /* Look at the next message received in this buffer. */
0130df54
SH
1446 msg = NLMSG_NEXT(msg, recv_len);
1447 }
1448 } while (readmore);
1449
6ce39620
CB
1450#pragma GCC diagnostic pop
1451
ebc73a67 1452 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1453 err = -1;
1454
1455out:
1456 netlink_close(&nlh);
1457 nlmsg_free(answer);
1458 nlmsg_free(nlmsg);
1459 return err;
1460}
1461
d472214b 1462int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1463{
ebc73a67 1464 int err, index, len;
06f976ca 1465 struct ifinfomsg *ifi;
ebc73a67
CB
1466 struct nl_handler nlh;
1467 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1468
3cfc0f3a
MN
1469 err = netlink_open(&nlh, NETLINK_ROUTE);
1470 if (err)
1471 return err;
75d09f83 1472
3cfc0f3a 1473 err = -EINVAL;
75d09f83 1474 len = strlen(name);
dae3fdf6 1475 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1476 goto out;
1477
3cfc0f3a 1478 err = -ENOMEM;
75d09f83
DL
1479 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1480 if (!nlmsg)
1481 goto out;
1482
06f976ca 1483 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1484 if (!answer)
1485 goto out;
1486
3cfc0f3a 1487 err = -EINVAL;
75d09f83
DL
1488 index = if_nametoindex(name);
1489 if (!index)
1490 goto out;
1491
ebc73a67 1492 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1493 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1494
1495 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1496 if (!ifi) {
1497 err = -ENOMEM;
1498 goto out;
1499 }
06f976ca
SZ
1500 ifi->ifi_family = AF_UNSPEC;
1501 ifi->ifi_index = index;
75d09f83
DL
1502
1503 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1504 goto out;
1505
1506 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1507out:
1508 netlink_close(&nlh);
1509 nlmsg_free(nlmsg);
1510 nlmsg_free(answer);
1511 return err;
1512}
1513
d472214b 1514int lxc_netdev_up(const char *name)
0ad19a3f 1515{
d472214b 1516 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1517}
1518
d472214b 1519int lxc_netdev_down(const char *name)
0ad19a3f 1520{
d472214b 1521 return netdev_set_flag(name, 0);
0ad19a3f 1522}
1523
497353b6 1524int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1525{
ebc73a67 1526 int err, len;
06f976ca 1527 struct ifinfomsg *ifi;
ebc73a67 1528 struct nl_handler nlh;
0ad19a3f 1529 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1530 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1531
3cfc0f3a
MN
1532 err = netlink_open(&nlh, NETLINK_ROUTE);
1533 if (err)
1534 return err;
0ad19a3f 1535
3cfc0f3a 1536 err = -EINVAL;
0ad19a3f 1537 len = strlen(name1);
dae3fdf6 1538 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1539 goto out;
1540
1541 len = strlen(name2);
dae3fdf6 1542 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1543 goto out;
1544
3cfc0f3a 1545 err = -ENOMEM;
0ad19a3f 1546 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1547 if (!nlmsg)
1548 goto out;
1549
06f976ca 1550 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1551 if (!answer)
1552 goto out;
1553
06f976ca 1554 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1555 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1556 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1557
1558 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1559 if (!ifi)
1560 goto out;
06f976ca 1561 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1562
3cfc0f3a 1563 err = -EINVAL;
79e68309 1564 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1565 if (!nest1)
1566 goto out;
1567
1568 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1569 goto out;
1570
1571 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1572 if (!nest2)
1573 goto out;
1574
1575 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1576 if (!nest3)
1577 goto out;
1578
06f976ca 1579 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1580 if (!ifi) {
1581 err = -ENOMEM;
06f976ca 1582 goto out;
25a9939b 1583 }
0ad19a3f 1584
1585 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1586 goto out;
1587
1588 nla_end_nested(nlmsg, nest3);
0ad19a3f 1589 nla_end_nested(nlmsg, nest2);
0ad19a3f 1590 nla_end_nested(nlmsg, nest1);
1591
1592 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1593 goto out;
1594
3cfc0f3a 1595 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1596out:
1597 netlink_close(&nlh);
1598 nlmsg_free(answer);
1599 nlmsg_free(nlmsg);
1600 return err;
1601}
1602
ebc73a67 1603/* TODO: merge with lxc_macvlan_create */
7c11d57a 1604int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1605{
ebc73a67 1606 int err, len, lindex;
06f976ca 1607 struct ifinfomsg *ifi;
ebc73a67 1608 struct nl_handler nlh;
26c39028 1609 struct rtattr *nest, *nest2;
ebc73a67 1610 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1611
3cfc0f3a
MN
1612 err = netlink_open(&nlh, NETLINK_ROUTE);
1613 if (err)
1614 return err;
26c39028 1615
3cfc0f3a 1616 err = -EINVAL;
26c39028 1617 len = strlen(master);
dae3fdf6 1618 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1619 goto err3;
1620
1621 len = strlen(name);
dae3fdf6 1622 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1623 goto err3;
1624
3cfc0f3a 1625 err = -ENOMEM;
26c39028
JHS
1626 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1627 if (!nlmsg)
1628 goto err3;
1629
06f976ca 1630 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1631 if (!answer)
1632 goto err2;
1633
3cfc0f3a 1634 err = -EINVAL;
26c39028
JHS
1635 lindex = if_nametoindex(master);
1636 if (!lindex)
1637 goto err1;
1638
06f976ca 1639 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1640 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1641 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1642
1643 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1644 if (!ifi) {
1645 err = -ENOMEM;
1646 goto err1;
1647 }
06f976ca 1648 ifi->ifi_family = AF_UNSPEC;
26c39028 1649
79e68309 1650 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1651 if (!nest)
1652 goto err1;
1653
1654 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1655 goto err1;
1656
1657 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1658 if (!nest2)
1659 goto err1;
e892973e 1660
26c39028
JHS
1661 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1662 goto err1;
e892973e 1663
26c39028 1664 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1665 nla_end_nested(nlmsg, nest);
1666
1667 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1668 goto err1;
1669
1670 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1671 goto err1;
1672
3cfc0f3a 1673 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1674err1:
1675 nlmsg_free(answer);
1676err2:
1677 nlmsg_free(nlmsg);
1678err3:
1679 netlink_close(&nlh);
1680 return err;
1681}
1682
e892973e 1683int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1684{
ebc73a67 1685 int err, index, len;
06f976ca 1686 struct ifinfomsg *ifi;
ebc73a67 1687 struct nl_handler nlh;
e892973e 1688 struct rtattr *nest, *nest2;
ebc73a67 1689 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1690
3cfc0f3a
MN
1691 err = netlink_open(&nlh, NETLINK_ROUTE);
1692 if (err)
1693 return err;
0ad19a3f 1694
3cfc0f3a 1695 err = -EINVAL;
0ad19a3f 1696 len = strlen(master);
dae3fdf6 1697 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1698 goto out;
1699
1700 len = strlen(name);
dae3fdf6 1701 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1702 goto out;
1703
3cfc0f3a 1704 err = -ENOMEM;
0ad19a3f 1705 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1706 if (!nlmsg)
1707 goto out;
1708
06f976ca 1709 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1710 if (!answer)
1711 goto out;
1712
3cfc0f3a 1713 err = -EINVAL;
0ad19a3f 1714 index = if_nametoindex(master);
1715 if (!index)
1716 goto out;
1717
06f976ca 1718 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1719 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1720 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1721
1722 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1723 if (!ifi) {
1724 err = -ENOMEM;
1725 goto out;
1726 }
06f976ca 1727 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1728
79e68309 1729 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1730 if (!nest)
1731 goto out;
1732
1733 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1734 goto out;
1735
e892973e
DL
1736 if (mode) {
1737 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1738 if (!nest2)
1739 goto out;
1740
1741 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1742 goto out;
1743
1744 nla_end_nested(nlmsg, nest2);
1745 }
1746
0ad19a3f 1747 nla_end_nested(nlmsg, nest);
1748
1749 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1750 goto out;
1751
1752 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1753 goto out;
1754
3cfc0f3a 1755 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1756out:
1757 netlink_close(&nlh);
1758 nlmsg_free(answer);
1759 nlmsg_free(nlmsg);
1760 return err;
1761}
1762
1763static int proc_sys_net_write(const char *path, const char *value)
1764{
ebc73a67
CB
1765 int fd;
1766 int err = 0;
0ad19a3f 1767
1768 fd = open(path, O_WRONLY);
1769 if (fd < 0)
1770 return -errno;
1771
f640cf46 1772 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1773 err = -errno;
1774
1775 close(fd);
1776 return err;
1777}
1778
6509154d 1779static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1780{
1781 int ret;
1782 char path[PATH_MAX];
1783 char buf[1] = "";
1784
1785 if (family != AF_INET && family != AF_INET6)
1786 return minus_one_set_errno(EINVAL);
1787
1788 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1789 family == AF_INET ? "ipv4" : "ipv6", ifname,
1790 "forwarding");
1791 if (ret < 0 || (size_t)ret >= PATH_MAX)
1792 return minus_one_set_errno(E2BIG);
1793
1794 return lxc_read_file_expect(path, buf, 1, "1");
1795}
1796
0ad19a3f 1797static int neigh_proxy_set(const char *ifname, int family, int flag)
1798{
9ba8130c 1799 int ret;
419590da 1800 char path[PATH_MAX];
0ad19a3f 1801
1802 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1803 return -EINVAL;
0ad19a3f 1804
419590da 1805 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1806 family == AF_INET ? "ipv4" : "ipv6", ifname,
1807 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1808 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1809 return -E2BIG;
0ad19a3f 1810
ebc73a67 1811 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1812}
1813
6509154d 1814static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1815{
1816 int ret;
1817 char path[PATH_MAX];
1818 char buf[1] = "";
1819
1820 if (family != AF_INET && family != AF_INET6)
1821 return minus_one_set_errno(EINVAL);
1822
1823 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1824 family == AF_INET ? "ipv4" : "ipv6", ifname,
1825 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1826 if (ret < 0 || (size_t)ret >= PATH_MAX)
1827 return minus_one_set_errno(E2BIG);
1828
1829 return lxc_read_file_expect(path, buf, 1, "1");
1830}
1831
497353b6 1832int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1833{
1834 return neigh_proxy_set(name, family, 1);
1835}
1836
497353b6 1837int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1838{
1839 return neigh_proxy_set(name, family, 0);
1840}
1841
1842int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1843{
1f1b18e7
DL
1844 int i = 0;
1845 unsigned val;
ebc73a67
CB
1846 char c;
1847 unsigned char *data;
1f1b18e7
DL
1848
1849 sockaddr->sa_family = ARPHRD_ETHER;
1850 data = (unsigned char *)sockaddr->sa_data;
1851
1852 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1853 c = *macaddr++;
1854 if (isdigit(c))
1855 val = c - '0';
1856 else if (c >= 'a' && c <= 'f')
1857 val = c - 'a' + 10;
1858 else if (c >= 'A' && c <= 'F')
1859 val = c - 'A' + 10;
1860 else
1861 return -EINVAL;
1862
1863 val <<= 4;
1864 c = *macaddr;
1865 if (isdigit(c))
1866 val |= c - '0';
1867 else if (c >= 'a' && c <= 'f')
1868 val |= c - 'a' + 10;
1869 else if (c >= 'A' && c <= 'F')
1870 val |= c - 'A' + 10;
1871 else if (c == ':' || c == 0)
1872 val >>= 4;
1873 else
1874 return -EINVAL;
1875 if (c != 0)
1876 macaddr++;
1877 *data++ = (unsigned char)(val & 0377);
1878 i++;
1879
1880 if (*macaddr == ':')
1881 macaddr++;
0ad19a3f 1882 }
0ad19a3f 1883
1f1b18e7 1884 return 0;
0ad19a3f 1885}
1886
ebc73a67
CB
1887static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1888 void *acast, int prefix)
0ad19a3f 1889{
ebc73a67 1890 int addrlen, err;
06f976ca 1891 struct ifaddrmsg *ifa;
ebc73a67
CB
1892 struct nl_handler nlh;
1893 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1894
ebc73a67
CB
1895 addrlen = family == AF_INET ? sizeof(struct in_addr)
1896 : sizeof(struct in6_addr);
4bf1968d 1897
3cfc0f3a
MN
1898 err = netlink_open(&nlh, NETLINK_ROUTE);
1899 if (err)
1900 return err;
0ad19a3f 1901
3cfc0f3a 1902 err = -ENOMEM;
0ad19a3f 1903 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1904 if (!nlmsg)
1905 goto out;
1906
06f976ca 1907 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1908 if (!answer)
1909 goto out;
1910
06f976ca 1911 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1912 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1913 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1914
1915 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1916 if (!ifa)
25a9939b 1917 goto out;
06f976ca
SZ
1918 ifa->ifa_prefixlen = prefix;
1919 ifa->ifa_index = ifindex;
1920 ifa->ifa_family = family;
1921 ifa->ifa_scope = 0;
acf47e1b 1922
3cfc0f3a 1923 err = -EINVAL;
4bf1968d 1924 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1925 goto out;
1926
4bf1968d 1927 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1928 goto out;
1929
d8948a52 1930 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1931 goto out;
1932
ebc73a67 1933 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1934 err = -EPROTONOSUPPORT;
79881dc6
DL
1935 if (family == AF_INET6 &&
1936 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1937 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1938 goto out;
0ad19a3f 1939
3cfc0f3a 1940 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1941out:
1942 netlink_close(&nlh);
1943 nlmsg_free(answer);
1944 nlmsg_free(nlmsg);
1945 return err;
1946}
1947
1f1b18e7 1948int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1949 struct in6_addr *mcast, struct in6_addr *acast,
1950 int prefix)
1f1b18e7
DL
1951{
1952 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1953}
1954
ebc73a67
CB
1955int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1956 int prefix)
1f1b18e7
DL
1957{
1958 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1959}
1960
ebc73a67
CB
1961/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1962 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1963 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1964 */
6ce39620
CB
1965#pragma GCC diagnostic push
1966#pragma GCC diagnostic ignored "-Wcast-align"
1967
ebc73a67
CB
1968static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1969{
1970 int addrlen;
06f976ca
SZ
1971 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1972 struct rtattr *rta = IFA_RTA(ifa);
1973 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1974
06f976ca 1975 if (ifa->ifa_family != family)
19a26f82
MK
1976 return 0;
1977
ebc73a67
CB
1978 addrlen = family == AF_INET ? sizeof(struct in_addr)
1979 : sizeof(struct in6_addr);
19a26f82
MK
1980
1981 /* Loop over the rtattr's in this message */
ebc73a67 1982 while (RTA_OK(rta, attr_len)) {
19a26f82 1983 /* Found a local address for the requested interface,
ebc73a67
CB
1984 * return it.
1985 */
1986 if (rta->rta_type == IFA_LOCAL ||
1987 rta->rta_type == IFA_ADDRESS) {
1988 /* Sanity check. The family check above should make sure
1989 * the address length is correct, but check here just in
1990 * case.
1991 */
19a26f82
MK
1992 if (RTA_PAYLOAD(rta) != addrlen)
1993 return -1;
1994
ebc73a67
CB
1995 /* We might have found an IFA_ADDRESS before, which we
1996 * now overwrite with an IFA_LOCAL.
1997 */
dd66e5ad 1998 if (!*res) {
19a26f82 1999 *res = malloc(addrlen);
dd66e5ad
DE
2000 if (!*res)
2001 return -1;
2002 }
19a26f82
MK
2003
2004 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2005 if (rta->rta_type == IFA_LOCAL)
2006 break;
2007 }
2008 rta = RTA_NEXT(rta, attr_len);
2009 }
2010 return 0;
2011}
2012
6ce39620
CB
2013#pragma GCC diagnostic pop
2014
19a26f82
MK
2015static int ip_addr_get(int family, int ifindex, void **res)
2016{
ebc73a67 2017 int answer_len, err;
06f976ca 2018 struct ifaddrmsg *ifa;
ebc73a67 2019 struct nl_handler nlh;
19a26f82 2020 struct nlmsghdr *msg;
ebc73a67
CB
2021 int readmore = 0, recv_len = 0;
2022 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2023
2024 err = netlink_open(&nlh, NETLINK_ROUTE);
2025 if (err)
2026 return err;
2027
2028 err = -ENOMEM;
2029 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2030 if (!nlmsg)
2031 goto out;
2032
06f976ca 2033 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2034 if (!answer)
2035 goto out;
2036
ebc73a67
CB
2037 /* Save the answer buffer length, since it will be overwritten on the
2038 * first receive (and we might need to receive more than once).
2039 */
06f976ca
SZ
2040 answer_len = answer->nlmsghdr->nlmsg_len;
2041
ebc73a67 2042 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2043 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2044
06f976ca 2045 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2046 if (!ifa)
2047 goto out;
06f976ca 2048 ifa->ifa_family = family;
19a26f82 2049
ebc73a67
CB
2050 /* Send the request for addresses, which returns all addresses on all
2051 * interfaces.
2052 */
19a26f82
MK
2053 err = netlink_send(&nlh, nlmsg);
2054 if (err < 0)
2055 goto out;
19a26f82 2056
6ce39620
CB
2057#pragma GCC diagnostic push
2058#pragma GCC diagnostic ignored "-Wcast-align"
2059
19a26f82
MK
2060 do {
2061 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2062 * overwritten by a previous receive.
2063 */
06f976ca 2064 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2065
ebc73a67 2066 /* Get the (next) batch of reply messages. */
19a26f82
MK
2067 err = netlink_rcv(&nlh, answer);
2068 if (err < 0)
2069 goto out;
2070
2071 recv_len = err;
2072 err = 0;
2073
ebc73a67 2074 /* Satisfy the typing for the netlink macros. */
06f976ca 2075 msg = answer->nlmsghdr;
19a26f82
MK
2076
2077 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2078 /* Stop reading if we see an error message. */
19a26f82 2079 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2080 struct nlmsgerr *errmsg =
2081 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2082 err = errmsg->error;
2083 goto out;
2084 }
2085
ebc73a67 2086 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2087 if (msg->nlmsg_type == NLMSG_DONE) {
2088 readmore = 0;
2089 break;
2090 }
2091
2092 if (msg->nlmsg_type != RTM_NEWADDR) {
2093 err = -1;
2094 goto out;
2095 }
2096
06f976ca
SZ
2097 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2098 if (ifa->ifa_index == ifindex) {
2099 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2100 err = -1;
2101 goto out;
2102 }
2103
ebc73a67 2104 /* Found a result, stop searching. */
19a26f82
MK
2105 if (*res)
2106 goto out;
2107 }
2108
ebc73a67
CB
2109 /* Keep reading more data from the socket if the last
2110 * message had the NLF_F_MULTI flag set.
2111 */
19a26f82
MK
2112 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2113
ebc73a67 2114 /* Look at the next message received in this buffer. */
19a26f82
MK
2115 msg = NLMSG_NEXT(msg, recv_len);
2116 }
2117 } while (readmore);
2118
6ce39620
CB
2119#pragma GCC diagnostic pop
2120
19a26f82 2121 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2122 * error.
2123 */
19a26f82
MK
2124 err = -1;
2125
2126out:
2127 netlink_close(&nlh);
2128 nlmsg_free(answer);
2129 nlmsg_free(nlmsg);
2130 return err;
2131}
2132
2133int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2134{
ebc73a67 2135 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2136}
2137
ebc73a67 2138int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2139{
ebc73a67 2140 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2141}
2142
f8fee0e2
MK
2143static int ip_gateway_add(int family, int ifindex, void *gw)
2144{
ebc73a67 2145 int addrlen, err;
f8fee0e2 2146 struct nl_handler nlh;
06f976ca 2147 struct rtmsg *rt;
ebc73a67 2148 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2149
ebc73a67
CB
2150 addrlen = family == AF_INET ? sizeof(struct in_addr)
2151 : sizeof(struct in6_addr);
f8fee0e2
MK
2152
2153 err = netlink_open(&nlh, NETLINK_ROUTE);
2154 if (err)
2155 return err;
2156
2157 err = -ENOMEM;
2158 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2159 if (!nlmsg)
2160 goto out;
2161
06f976ca 2162 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2163 if (!answer)
2164 goto out;
2165
06f976ca 2166 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2167 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2168 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2169
2170 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2171 if (!rt)
2172 goto out;
06f976ca
SZ
2173 rt->rtm_family = family;
2174 rt->rtm_table = RT_TABLE_MAIN;
2175 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2176 rt->rtm_protocol = RTPROT_BOOT;
2177 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2178 /* "default" destination */
06f976ca 2179 rt->rtm_dst_len = 0;
f8fee0e2
MK
2180
2181 err = -EINVAL;
a2f9a670 2182
2183 /* If gateway address not supplied, then a device route will be created instead */
2184 if (gw != NULL) {
2185 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2186 goto out;
2187 }
f8fee0e2
MK
2188
2189 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2190 * addresses for the gateway.
2191 */
f8fee0e2
MK
2192 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2193 goto out;
2194
2195 err = netlink_transaction(&nlh, nlmsg, answer);
2196out:
2197 netlink_close(&nlh);
2198 nlmsg_free(answer);
2199 nlmsg_free(nlmsg);
2200 return err;
2201}
2202
2203int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2204{
2205 return ip_gateway_add(AF_INET, ifindex, gw);
2206}
2207
2208int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2209{
2210 return ip_gateway_add(AF_INET6, ifindex, gw);
2211}
581c75e7 2212bool is_ovs_bridge(const char *bridge)
0d204771 2213{
ebc73a67 2214 int ret;
0d204771 2215 struct stat sb;
ebc73a67 2216 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2217
ebc73a67
CB
2218 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2219 bridge);
2220 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2221 return false;
2222
2223 ret = stat(brdirname, &sb);
2224 if (ret < 0 && errno == ENOENT)
0d204771 2225 return true;
ebc73a67 2226
0d204771
SH
2227 return false;
2228}
2229
581c75e7
CB
2230struct ovs_veth_args {
2231 const char *bridge;
2232 const char *nic;
2233};
2234
cb0dc11b
CB
2235/* Called from a background thread - when nic goes away, remove it from the
2236 * bridge.
c43cbc04 2237 */
581c75e7 2238static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2239{
581c75e7 2240 struct ovs_veth_args *args = data;
cb0dc11b 2241
581c75e7
CB
2242 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2243 (char *)NULL);
2244 return -1;
c43cbc04
SH
2245}
2246
581c75e7 2247int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2248{
c43cbc04 2249 int ret;
419590da 2250 char cmd_output[PATH_MAX];
581c75e7 2251 struct ovs_veth_args args;
6ad22d06 2252
581c75e7
CB
2253 args.bridge = bridge;
2254 args.nic = nic;
2255 ret = run_command(cmd_output, sizeof(cmd_output),
2256 lxc_ovs_delete_port_exec, (void *)&args);
2257 if (ret < 0) {
2258 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2259 "%s", bridge, nic, cmd_output);
6ad22d06 2260 return -1;
581c75e7 2261 }
0d204771 2262
581c75e7
CB
2263 return 0;
2264}
ebc73a67 2265
581c75e7
CB
2266static int lxc_ovs_attach_bridge_exec(void *data)
2267{
2268 struct ovs_veth_args *args = data;
ebc73a67 2269
581c75e7
CB
2270 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2271 (char *)NULL);
2272 return -1;
2273}
ebc73a67 2274
581c75e7
CB
2275static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2276{
2277 int ret;
419590da 2278 char cmd_output[PATH_MAX];
581c75e7 2279 struct ovs_veth_args args;
ebc73a67 2280
581c75e7
CB
2281 args.bridge = bridge;
2282 args.nic = nic;
2283 ret = run_command(cmd_output, sizeof(cmd_output),
2284 lxc_ovs_attach_bridge_exec, (void *)&args);
2285 if (ret < 0) {
2286 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2287 bridge, nic, cmd_output);
2288 return -1;
c43cbc04 2289 }
0d204771 2290
581c75e7 2291 return 0;
0d204771 2292}
0d204771 2293
581c75e7 2294int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2295{
ebc73a67 2296 int err, fd, index;
9de31d5a 2297 size_t retlen;
0ad19a3f 2298 struct ifreq ifr;
2299
dae3fdf6 2300 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2301 return -EINVAL;
0ad19a3f 2302
2303 index = if_nametoindex(ifname);
2304 if (!index)
3cfc0f3a 2305 return -EINVAL;
0ad19a3f 2306
0d204771 2307 if (is_ovs_bridge(bridge))
581c75e7 2308 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2309
ad9429e5 2310 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2311 if (fd < 0)
3cfc0f3a 2312 return -errno;
0ad19a3f 2313
9de31d5a 2314 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2315 if (retlen >= IFNAMSIZ) {
2316 close(fd);
9de31d5a 2317 return -E2BIG;
42cc4083 2318 }
9de31d5a 2319
ebc73a67 2320 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2321 ifr.ifr_ifindex = index;
7d163508 2322 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2323 close(fd);
3cfc0f3a
MN
2324 if (err)
2325 err = -errno;
0ad19a3f 2326
2327 return err;
2328}
72d0e1cb 2329
ebc73a67 2330static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2331 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2332 [LXC_NET_VETH] = "veth",
2333 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2334 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2335 [LXC_NET_PHYS] = "phys",
b343592b
BP
2336 [LXC_NET_VLAN] = "vlan",
2337 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2338};
2339
2340const char *lxc_net_type_to_str(int type)
2341{
2342 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2343 return NULL;
ebc73a67 2344
72d0e1cb
SG
2345 return lxc_network_types[type];
2346}
8befa924 2347
ebc73a67 2348static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2349
966e9f1f 2350char *lxc_mkifname(char *template)
a0265685 2351{
2d7bf744 2352 int ret;
b1e44ed1 2353 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2354 char name[IFNAMSIZ];
2355 bool exists = false;
2356 size_t i = 0;
280cc35f 2357#ifdef HAVE_RAND_R
2358 unsigned int seed;
2359
2360 seed = randseed(false);
2361#else
2362
2363 (void)randseed(true);
2364#endif
a0265685 2365
535e8859
CB
2366 if (strlen(template) >= IFNAMSIZ)
2367 return NULL;
2368
ebc73a67 2369 /* Get all the network interfaces. */
b1e44ed1 2370 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2371 if (ret < 0) {
6d1400b5 2372 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2373 return NULL;
2374 }
a0265685 2375
ebc73a67 2376 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2377 for (;;) {
966e9f1f 2378 name[0] = '\0';
94b1cade 2379 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2380
966e9f1f 2381 exists = false;
280cc35f 2382
a0265685
SG
2383 for (i = 0; i < strlen(name); i++) {
2384 if (name[i] == 'X') {
2385#ifdef HAVE_RAND_R
8523344a 2386 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2387#else
8523344a 2388 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2389#endif
2390 }
2391 }
2392
2393 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2394 if (!strcmp(ifa->ifa_name, name)) {
2395 exists = true;
a0265685
SG
2396 break;
2397 }
2398 }
2399
966e9f1f 2400 if (!exists)
a0265685 2401 break;
a0265685
SG
2402 }
2403
b1e44ed1 2404 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2405 (void)strlcpy(template, name, strlen(template) + 1);
2406
2407 return template;
a0265685
SG
2408}
2409
8befa924
SH
2410int setup_private_host_hw_addr(char *veth1)
2411{
ebc73a67 2412 int err, sockfd;
8befa924 2413 struct ifreq ifr;
8befa924 2414
ad9429e5 2415 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2416 if (sockfd < 0)
2417 return -errno;
2418
ebc73a67 2419 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2420 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2421 close(sockfd);
ebc73a67 2422 return -E2BIG;
87c6e5db 2423 }
ebc73a67 2424
8befa924
SH
2425 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2426 if (err < 0) {
8befa924 2427 close(sockfd);
8befa924
SH
2428 return -errno;
2429 }
2430
2431 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2432 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2433 close(sockfd);
8befa924
SH
2434 if (err < 0)
2435 return -errno;
2436
2437 return 0;
2438}
811ef482
CB
2439
2440int lxc_find_gateway_addresses(struct lxc_handler *handler)
2441{
2442 struct lxc_list *network = &handler->conf->network;
2443 struct lxc_list *iterator;
2444 struct lxc_netdev *netdev;
2445 int link_index;
2446
2447 lxc_list_for_each(iterator, network) {
2448 netdev = iterator->elem;
2449
2450 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2451 continue;
2452
2453 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2454 ERROR("Automatic gateway detection is only supported "
2455 "for veth and macvlan");
2456 return -1;
2457 }
2458
de4855a8 2459 if (netdev->link[0] == '\0') {
811ef482
CB
2460 ERROR("Automatic gateway detection needs a link interface");
2461 return -1;
2462 }
2463
2464 link_index = if_nametoindex(netdev->link);
2465 if (!link_index)
2466 return -EINVAL;
2467
2468 if (netdev->ipv4_gateway_auto) {
2469 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2470 ERROR("Failed to automatically find ipv4 gateway "
2471 "address from link interface \"%s\"", netdev->link);
2472 return -1;
2473 }
2474 }
2475
2476 if (netdev->ipv6_gateway_auto) {
2477 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2478 ERROR("Failed to automatically find ipv6 gateway "
2479 "address from link interface \"%s\"", netdev->link);
2480 return -1;
2481 }
2482 }
2483 }
2484
2485 return 0;
2486}
2487
2488#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2489static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2490 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2491{
2492 int ret;
2493 pid_t child;
2494 int bytes, pipefd[2];
2495 char *token, *saveptr = NULL;
095ead80 2496 char netdev_link[IFNAMSIZ];
419590da 2497 char buffer[PATH_MAX] = {0};
94b1cade 2498 size_t retlen;
811ef482
CB
2499
2500 if (netdev->type != LXC_NET_VETH) {
2501 ERROR("Network type %d not support for unprivileged use", netdev->type);
2502 return -1;
2503 }
2504
2505 ret = pipe(pipefd);
2506 if (ret < 0) {
2507 SYSERROR("Failed to create pipe");
2508 return -1;
2509 }
2510
2511 child = fork();
2512 if (child < 0) {
2513 SYSERROR("Failed to create new process");
2514 close(pipefd[0]);
2515 close(pipefd[1]);
2516 return -1;
2517 }
2518
2519 if (child == 0) {
8335fd40 2520 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2521
2522 close(pipefd[0]);
2523
2524 ret = dup2(pipefd[1], STDOUT_FILENO);
2525 if (ret >= 0)
2526 ret = dup2(pipefd[1], STDERR_FILENO);
2527 close(pipefd[1]);
2528 if (ret < 0) {
2529 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2530 _exit(EXIT_FAILURE);
811ef482
CB
2531 }
2532
de4855a8 2533 if (netdev->link[0] != '\0')
9de31d5a 2534 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2535 else
9de31d5a
CB
2536 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2537 if (retlen >= IFNAMSIZ) {
2538 SYSERROR("Invalid network device name");
2539 _exit(EXIT_FAILURE);
2540 }
811ef482 2541
8335fd40
CB
2542 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2543 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2544 _exit(EXIT_FAILURE);
8335fd40 2545 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2546
2547 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2548 lxcname, pidstr, netdev_link,
de4855a8
CB
2549 netdev->name[0] != '\0' ? netdev->name : "(null)");
2550 if (netdev->name[0] != '\0')
811ef482
CB
2551 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2552 lxcpath, lxcname, pidstr, "veth", netdev_link,
2553 netdev->name, (char *)NULL);
2554 else
2555 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2556 lxcpath, lxcname, pidstr, "veth", netdev_link,
2557 (char *)NULL);
2558 SYSERROR("Failed to execute lxc-user-nic");
78070056 2559 _exit(EXIT_FAILURE);
811ef482
CB
2560 }
2561
2562 /* close the write-end of the pipe */
2563 close(pipefd[1]);
2564
419590da 2565 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2566 if (bytes < 0) {
74c6e2b0 2567 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2568 close(pipefd[0]);
6b9f82a9
CB
2569 } else {
2570 buffer[bytes - 1] = '\0';
811ef482 2571 }
811ef482
CB
2572
2573 ret = wait_for_pid(child);
2574 close(pipefd[0]);
6b9f82a9 2575 if (ret != 0 || bytes < 0) {
811ef482
CB
2576 ERROR("lxc-user-nic failed to configure requested network: %s",
2577 buffer[0] != '\0' ? buffer : "(null)");
2578 return -1;
2579 }
2580 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2581
2582 /* netdev->name */
2583 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2584 if (!token) {
2585 ERROR("Failed to parse lxc-user-nic output");
811ef482 2586 return -1;
74c6e2b0 2587 }
811ef482 2588
def4def6
CB
2589 memset(netdev->name, 0, IFNAMSIZ);
2590 memcpy(netdev->name, token, IFNAMSIZ - 1);
811ef482 2591
74c6e2b0 2592 /* netdev->ifindex */
811ef482 2593 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2594 if (!token) {
2595 ERROR("Failed to parse lxc-user-nic output");
811ef482 2596 return -1;
74c6e2b0 2597 }
811ef482 2598
74c6e2b0
CB
2599 ret = lxc_safe_int(token, &netdev->ifindex);
2600 if (ret < 0) {
6d1400b5 2601 errno = -ret;
2602 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2603 return -1;
2604 }
2605
74c6e2b0 2606 /* netdev->priv.veth_attr.veth1 */
811ef482 2607 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2608 if (!token) {
2609 ERROR("Failed to parse lxc-user-nic output");
811ef482 2610 return -1;
74c6e2b0 2611 }
811ef482 2612
94b1cade
DJ
2613 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2614 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2615 ERROR("Host side veth device name returned by lxc-user-nic is "
2616 "too long");
2617 return -E2BIG;
2618 }
74c6e2b0
CB
2619
2620 /* netdev->priv.veth_attr.ifindex */
2621 token = strtok_r(NULL, ":", &saveptr);
2622 if (!token) {
2623 ERROR("Failed to parse lxc-user-nic output");
2624 return -1;
2625 }
2626
2627 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2628 if (ret < 0) {
6d1400b5 2629 errno = -ret;
2630 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2631 return -1;
2632 }
2633
4d781681 2634 if (netdev->upscript) {
2635 char *argv[] = {
2636 "veth",
2637 netdev->link,
2638 netdev->priv.veth_attr.veth1,
2639 NULL,
2640 };
2641
2642 ret = run_script_argv(lxcname,
2643 hooks_version, "net",
2644 netdev->upscript, "up", argv);
2645 if (ret < 0)
2646 return -1;
2647 }
2648
811ef482
CB
2649 return 0;
2650}
2651
f0ecc19d 2652static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2653 struct lxc_netdev *netdev,
2654 const char *netns_path)
811ef482
CB
2655{
2656 int bytes, ret;
2657 pid_t child;
2658 int pipefd[2];
419590da 2659 char buffer[PATH_MAX] = {0};
811ef482
CB
2660
2661 if (netdev->type != LXC_NET_VETH) {
2662 ERROR("Network type %d not support for unprivileged use", netdev->type);
2663 return -1;
2664 }
2665
2666 ret = pipe(pipefd);
2667 if (ret < 0) {
2668 SYSERROR("Failed to create pipe");
2669 return -1;
2670 }
2671
2672 child = fork();
2673 if (child < 0) {
2674 SYSERROR("Failed to create new process");
2675 close(pipefd[0]);
2676 close(pipefd[1]);
2677 return -1;
2678 }
2679
2680 if (child == 0) {
8843fde4 2681 char *hostveth;
811ef482
CB
2682
2683 close(pipefd[0]);
2684
2685 ret = dup2(pipefd[1], STDOUT_FILENO);
2686 if (ret >= 0)
2687 ret = dup2(pipefd[1], STDERR_FILENO);
2688 close(pipefd[1]);
2689 if (ret < 0) {
2690 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2691 _exit(EXIT_FAILURE);
811ef482
CB
2692 }
2693
8843fde4
CB
2694 if (netdev->priv.veth_attr.pair[0] != '\0')
2695 hostveth = netdev->priv.veth_attr.pair;
2696 else
2697 hostveth = netdev->priv.veth_attr.veth1;
2698 if (hostveth[0] == '\0') {
74c6e2b0 2699 SYSERROR("Host side veth device name is missing");
a30b9023 2700 _exit(EXIT_FAILURE);
74c6e2b0
CB
2701 }
2702
de4855a8 2703 if (netdev->link[0] == '\0') {
811ef482 2704 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2705 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2706 _exit(EXIT_FAILURE);
74c6e2b0 2707 }
811ef482 2708
811ef482 2709 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2710 lxcname, netns_path, netdev->link, hostveth);
811ef482 2711 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2712 lxcname, netns_path, "veth", netdev->link, hostveth,
2713 (char *)NULL);
811ef482 2714 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2715 _exit(EXIT_FAILURE);
811ef482
CB
2716 }
2717
2718 close(pipefd[1]);
2719
419590da 2720 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2721 if (bytes < 0) {
2722 SYSERROR("Failed to read from pipe file descriptor.");
2723 close(pipefd[0]);
6b9f82a9
CB
2724 } else {
2725 buffer[bytes - 1] = '\0';
811ef482 2726 }
811ef482 2727
6b9f82a9
CB
2728 ret = wait_for_pid(child);
2729 close(pipefd[0]);
2730 if (ret != 0 || bytes < 0) {
811ef482
CB
2731 ERROR("lxc-user-nic failed to delete requested network: %s",
2732 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2733 return -1;
2734 }
2735
811ef482
CB
2736 return 0;
2737}
2738
1bd8d726
CB
2739bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2740{
2741 int ret;
2742 struct lxc_list *iterator;
2743 struct lxc_list *network = &handler->conf->network;
2744 /* strlen("/proc/") = 6
2745 * +
8335fd40 2746 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2747 * +
2748 * strlen("/fd/") = 4
2749 * +
8335fd40 2750 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2751 * +
2752 * \0
2753 */
8335fd40 2754 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2755
2756 *netns_path = '\0';
2757
28d9e29e 2758 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2759 DEBUG("Cannot not guarantee safe deletion of network devices. "
2760 "Manual cleanup maybe needed");
2761 return false;
2762 }
2763
2764 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2765 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2766 if (ret < 0 || ret >= sizeof(netns_path))
2767 return false;
2768
2769 lxc_list_for_each(iterator, network) {
2770 char *hostveth = NULL;
2771 struct lxc_netdev *netdev = iterator->elem;
2772
2773 /* We can only delete devices whose ifindex we have. If we don't
2774 * have the index it means that we didn't create it.
2775 */
2776 if (!netdev->ifindex)
2777 continue;
2778
2779 if (netdev->type == LXC_NET_PHYS) {
2780 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2781 netdev->link);
2782 if (ret < 0)
2783 WARN("Failed to rename interface with index %d "
2784 "to its initial name \"%s\"",
2785 netdev->ifindex, netdev->link);
2786 else
2787 TRACE("Renamed interface with index %d to its "
2788 "initial name \"%s\"",
2789 netdev->ifindex, netdev->link);
66a7c406 2790 goto clear_ifindices;
1bd8d726
CB
2791 }
2792
2793 ret = netdev_deconf[netdev->type](handler, netdev);
2794 if (ret < 0)
2795 WARN("Failed to deconfigure network device");
2796
2797 if (netdev->type != LXC_NET_VETH)
66a7c406 2798 goto clear_ifindices;
1bd8d726 2799
c869be20 2800 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2801 goto clear_ifindices;
1bd8d726 2802
8843fde4
CB
2803 if (netdev->priv.veth_attr.pair[0] != '\0')
2804 hostveth = netdev->priv.veth_attr.pair;
2805 else
2806 hostveth = netdev->priv.veth_attr.veth1;
2807 if (hostveth[0] == '\0')
66a7c406 2808 goto clear_ifindices;
8843fde4 2809
1bd8d726
CB
2810 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2811 handler->name, netdev,
2812 netns_path);
2813 if (ret < 0) {
1bd8d726 2814 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2815 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2816 goto clear_ifindices;
1bd8d726
CB
2817 }
2818 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2819 netdev->link);
66a7c406
CB
2820
2821clear_ifindices:
ad2ddfcd 2822 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2823 * have cached stale data which would cause it to fail on reboot
2824 * we're we don't re-read the on-disk config file.
2825 */
2826 netdev->ifindex = 0;
2827 if (netdev->type == LXC_NET_PHYS) {
2828 netdev->priv.phys_attr.ifindex = 0;
2829 } else if (netdev->type == LXC_NET_VETH) {
2830 netdev->priv.veth_attr.veth1[0] = '\0';
2831 netdev->priv.veth_attr.ifindex = 0;
2832 }
1bd8d726
CB
2833 }
2834
bb84beda 2835 return true;
1bd8d726
CB
2836}
2837
6509154d 2838struct ip_proxy_args {
2839 const char *ip;
2840 const char *dev;
2841};
2842
2843static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2844{
2845 struct ip_proxy_args *args = data;
2846
2847 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2848 return -1;
2849}
2850
2851static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2852{
2853 struct ip_proxy_args *args = data;
2854
2855 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2856 return -1;
2857}
2858
2859static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2860{
2861 int ret;
2862 char cmd_output[PATH_MAX];
2863 struct ip_proxy_args args = {
2864 .ip = ip,
2865 .dev = dev,
2866 };
2867
2868 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2869 if (ret < 0) {
2870 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2871 return -1;
2872 }
2873
2874 return 0;
2875}
2876
2877static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2878{
2879 int ret;
2880 char cmd_output[PATH_MAX];
2881 struct ip_proxy_args args = {
2882 .ip = ip,
2883 .dev = dev,
2884 };
2885
2886 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2887 if (ret < 0) {
2888 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2889 return -1;
2890 }
2891
2892 return 0;
2893}
2894
2895static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2896 struct lxc_list *cur, *next;
2897 struct lxc_inetdev *inet4dev;
2898 struct lxc_inet6dev *inet6dev;
2899 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2900 int err = 0;
2901 unsigned int lo_ifindex = 0;
6509154d 2902
2903 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2904 if (!lxc_list_empty(&netdev->ipv4)) {
2905 /* Check for net.ipv4.conf.[link].forwarding=1 */
2906 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2907 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2908 return minus_one_set_errno(EINVAL);
2909 }
2910 }
2911
2912 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2913 if (!lxc_list_empty(&netdev->ipv6)) {
2914 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2915 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2916 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2917 return minus_one_set_errno(EINVAL);
2918 }
2919
2920 /* Check for net.ipv6.conf.[link].forwarding=1 */
2921 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2922 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2923 return minus_one_set_errno(EINVAL);
2924 }
2925 }
2926
b670016a 2927 /* Perform IPVLAN specific checks. */
2928 if (netdev->type == LXC_NET_IPVLAN) {
2929 /* Check mode is l3s as other modes do not work with l2proxy. */
2930 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2931 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2932 return minus_one_set_errno(EINVAL);
2933 }
2934
2935 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2936 lo_ifindex = if_nametoindex(loDev);
2937 if (lo_ifindex == 0) {
2938 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev);
2939 return minus_one_set_errno(EINVAL);
2940 }
2941 }
2942
6509154d 2943 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2944 inet4dev = cur->elem;
2945 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2946 return minus_one_set_errno(-errno);
2947
2948 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2949 return minus_one_set_errno(EINVAL);
b670016a 2950
2951 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2952 if (netdev->type == LXC_NET_IPVLAN) {
2953 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2954 if (err < 0) {
2955 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loDev);
2956 return minus_one_set_errno(-err);
2957 }
2958 }
6509154d 2959 }
2960
2961 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2962 inet6dev = cur->elem;
2963 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2964 return minus_one_set_errno(-errno);
2965
2966 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2967 return minus_one_set_errno(EINVAL);
b670016a 2968
2969 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2970 if (netdev->type == LXC_NET_IPVLAN) {
2971 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2972 if (err < 0) {
2973 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loDev);
2974 return minus_one_set_errno(-err);
2975 }
2976 }
6509154d 2977 }
2978
2979 return 0;
2980}
2981
b670016a 2982static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2983 char bufinet4[INET_ADDRSTRLEN];
2984 unsigned int errCount = 0;
2985
2986 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
2987 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
2988 return minus_one_set_errno(EINVAL);
2989 }
2990
2991 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
2992 if (lo_ifindex > 0) {
2993 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
2994 errCount++;
2995 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
2996 }
2997 }
2998
2999 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3000 if (link[0] != '\0') {
3001 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3002 errCount++;
3003 }
3004
3005 if (errCount > 0)
3006 return minus_one_set_errno(EINVAL);
3007
3008 return 0;
3009}
3010
3011static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3012 char bufinet6[INET6_ADDRSTRLEN];
3013 unsigned int errCount = 0;
3014
3015 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3016 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3017 return minus_one_set_errno(EINVAL);
3018 }
3019
3020 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3021 if (lo_ifindex > 0) {
3022 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3023 errCount++;
3024 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3025 }
3026 }
3027
3028 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3029 if (link[0] != '\0') {
3030 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3031 errCount++;
3032 }
3033
3034 if (errCount > 0)
3035 return minus_one_set_errno(EINVAL);
3036
3037 return 0;
3038}
3039
6509154d 3040static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3041 unsigned int lo_ifindex = 0;
3042 unsigned int errCount = 0;
6509154d 3043 struct lxc_list *cur, *next;
3044 struct lxc_inetdev *inet4dev;
3045 struct lxc_inet6dev *inet6dev;
6509154d 3046
b670016a 3047 /* Perform IPVLAN specific checks. */
3048 if (netdev->type == LXC_NET_IPVLAN) {
3049 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3050 lo_ifindex = if_nametoindex(loDev);
3051 if (lo_ifindex == 0) {
3052 errCount++;
3053 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev);
6509154d 3054 }
b670016a 3055 }
6509154d 3056
b670016a 3057 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3058 inet4dev = cur->elem;
3059 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3060 errCount++;
6509154d 3061 }
3062
3063 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3064 inet6dev = cur->elem;
b670016a 3065 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3066 errCount++;
6509154d 3067 }
3068
b670016a 3069 if (errCount > 0)
6509154d 3070 return minus_one_set_errno(EINVAL);
3071
3072 return 0;
3073}
3074
811ef482
CB
3075int lxc_create_network_priv(struct lxc_handler *handler)
3076{
811ef482
CB
3077 struct lxc_list *iterator;
3078 struct lxc_list *network = &handler->conf->network;
3079
d0fbc7ba 3080 if (!handler->am_root)
811ef482
CB
3081 return 0;
3082
3083 lxc_list_for_each(iterator, network) {
3084 struct lxc_netdev *netdev = iterator->elem;
3085
3086 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3087 ERROR("Invalid network configuration type %d", netdev->type);
3088 return -1;
3089 }
3090
6509154d 3091 /* Setup l2proxy entries if enabled and used with a link property */
3092 if (netdev->l2proxy && netdev->link[0] != '\0') {
3093 if (lxc_setup_l2proxy(netdev)) {
3094 ERROR("Failed to setup l2proxy");
3095 return -1;
3096 }
3097 }
3098
811ef482
CB
3099 if (netdev_conf[netdev->type](handler, netdev)) {
3100 ERROR("Failed to create network device");
3101 return -1;
3102 }
811ef482
CB
3103 }
3104
3105 return 0;
3106}
3107
f0ecc19d 3108int lxc_network_move_created_netdev_priv(const char *lxcpath, const char *lxcname,
74c6e2b0 3109 struct lxc_list *network, pid_t pid)
811ef482 3110{
535e8859 3111 int ret;
811ef482
CB
3112 char ifname[IFNAMSIZ];
3113 struct lxc_list *iterator;
3114
e0010464 3115 if (am_guest_unpriv())
74c6e2b0 3116 return 0;
811ef482
CB
3117
3118 lxc_list_for_each(iterator, network) {
3119 struct lxc_netdev *netdev = iterator->elem;
3120
811ef482
CB
3121 if (!netdev->ifindex)
3122 continue;
3123
3124 /* retrieve the name of the interface */
3125 if (!if_indextoname(netdev->ifindex, ifname)) {
3126 ERROR("No interface corresponding to ifindex \"%d\"",
3127 netdev->ifindex);
3128 return -1;
3129 }
3130
535e8859
CB
3131 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3132 if (ret) {
6d1400b5 3133 errno = -ret;
3134 SYSERROR("Failed to move network device \"%s\" to "
3135 "network namespace %d", ifname, pid);
811ef482
CB
3136 return -1;
3137 }
3138
3139 DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
790255cf 3140 "of %d",
535e8859 3141 ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
811ef482
CB
3142 pid);
3143 }
3144
3145 return 0;
3146}
3147
f0ecc19d 3148int lxc_create_network_unpriv(const char *lxcpath, const char *lxcname,
4d781681 3149 struct lxc_list *network, pid_t pid, unsigned int hooks_version)
74c6e2b0
CB
3150{
3151 struct lxc_list *iterator;
3152
e0010464 3153 if (!am_guest_unpriv())
74c6e2b0
CB
3154 return 0;
3155
3156 lxc_list_for_each(iterator, network) {
3157 struct lxc_netdev *netdev = iterator->elem;
3158
3159 if (netdev->type == LXC_NET_EMPTY)
3160 continue;
3161
3162 if (netdev->type == LXC_NET_NONE)
3163 continue;
3164
3165 if (netdev->type != LXC_NET_VETH) {
3166 ERROR("Networks of type %s are not supported by "
3167 "unprivileged containers",
3168 lxc_net_type_to_str(netdev->type));
3169 return -1;
3170 }
3171
3172 if (netdev->mtu)
3173 INFO("mtu ignored due to insufficient privilege");
3174
4d781681 3175 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid, hooks_version))
74c6e2b0
CB
3176 return -1;
3177 }
3178
3179 return 0;
3180}
3181
1bd8d726 3182bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3183{
3184 int ret;
3185 struct lxc_list *iterator;
3186 struct lxc_list *network = &handler->conf->network;
1bd8d726 3187
811ef482
CB
3188 lxc_list_for_each(iterator, network) {
3189 char *hostveth = NULL;
3190 struct lxc_netdev *netdev = iterator->elem;
3191
3192 /* We can only delete devices whose ifindex we have. If we don't
3193 * have the index it means that we didn't create it.
3194 */
3195 if (!netdev->ifindex)
3196 continue;
3197
6509154d 3198 /* Delete l2proxy entries if enabled and used with a link property */
3199 if (netdev->l2proxy && netdev->link[0] != '\0') {
3200 if (lxc_delete_l2proxy(netdev))
3201 WARN("Failed to delete all l2proxy config");
3202 /* Don't return, let the network be cleaned up as normal. */
3203 }
3204
811ef482
CB
3205 if (netdev->type == LXC_NET_PHYS) {
3206 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3207 if (ret < 0)
3208 WARN("Failed to rename interface with index %d "
b809f232
CB
3209 "from \"%s\" to its initial name \"%s\"",
3210 netdev->ifindex, netdev->name, netdev->link);
0b154989 3211 else {
29589196
CB
3212 TRACE("Renamed interface with index %d from "
3213 "\"%s\" to its initial name \"%s\"",
3214 netdev->ifindex, netdev->name,
3215 netdev->link);
0b154989
TP
3216
3217 /* Restore original MTU */
3218 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3219 if (ret < 0) {
3220 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3221 netdev->link, netdev->priv.phys_attr.mtu);
3222 } else {
3223 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3224 netdev->link, netdev->priv.phys_attr.mtu);
3225 }
3226 }
66a7c406 3227 goto clear_ifindices;
811ef482
CB
3228 }
3229
3230 ret = netdev_deconf[netdev->type](handler, netdev);
3231 if (ret < 0)
3232 WARN("Failed to deconfigure network device");
3233
3234 /* Recent kernels remove the virtual interfaces when the network
3235 * namespace is destroyed but in case we did not move the
3236 * interface to the network namespace, we have to destroy it.
3237 */
1bd8d726 3238 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3239 if (ret < 0) {
3240 if (errno != ENODEV) {
3241 WARN("Failed to remove interface \"%s\" with index %d",
3242 netdev->name[0] != '\0' ? netdev->name : "(null)",
3243 netdev->ifindex);
3244 goto clear_ifindices;
3245 }
3246 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3247 netdev->name[0] != '\0' ? netdev->name : "(null)",
3248 netdev->ifindex);
811ef482 3249 }
1bd8d726 3250 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3251 netdev->name[0] != '\0' ? netdev->name : "(null)",
3252 netdev->ifindex);
811ef482
CB
3253
3254 if (netdev->type != LXC_NET_VETH)
66a7c406 3255 goto clear_ifindices;
811ef482 3256
811ef482
CB
3257 /* Explicitly delete host veth device to prevent lingering
3258 * devices. We had issues in LXD around this.
3259 */
de4855a8 3260 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3261 hostveth = netdev->priv.veth_attr.pair;
3262 else
3263 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3264 if (hostveth[0] == '\0')
66a7c406 3265 goto clear_ifindices;
811ef482
CB
3266
3267 ret = lxc_netdev_delete_by_name(hostveth);
3268 if (ret < 0) {
24548539
CB
3269 WARN("Failed to remove interface \"%s\" from \"%s\"",
3270 hostveth, netdev->link);
66a7c406 3271 goto clear_ifindices;
811ef482
CB
3272 }
3273 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3274
c869be20 3275 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3276 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3277 netdev->ifindex = 0;
3278 netdev->priv.veth_attr.ifindex = 0;
3279 goto clear_ifindices;
811ef482
CB
3280 }
3281
3282 /* Delete the openvswitch port. */
3283 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3284 if (ret < 0)
3285 WARN("Failed to remove port \"%s\" from openvswitch "
3286 "bridge \"%s\"", hostveth, netdev->link);
3287 else
3288 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3289 hostveth, netdev->link);
3290
66a7c406 3291clear_ifindices:
ad2ddfcd 3292 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3293 * have cached stale data which would cause it to fail on reboot
3294 * we're we don't re-read the on-disk config file.
3295 */
3296 netdev->ifindex = 0;
3297 if (netdev->type == LXC_NET_PHYS) {
3298 netdev->priv.phys_attr.ifindex = 0;
3299 } else if (netdev->type == LXC_NET_VETH) {
3300 netdev->priv.veth_attr.veth1[0] = '\0';
3301 netdev->priv.veth_attr.ifindex = 0;
3302 }
811ef482
CB
3303 }
3304
bb84beda 3305 return true;
811ef482
CB
3306}
3307
3308int lxc_requests_empty_network(struct lxc_handler *handler)
3309{
3310 struct lxc_list *network = &handler->conf->network;
3311 struct lxc_list *iterator;
3312 bool found_none = false, found_nic = false;
3313
3314 if (lxc_list_empty(network))
3315 return 0;
3316
3317 lxc_list_for_each(iterator, network) {
3318 struct lxc_netdev *netdev = iterator->elem;
3319
3320 if (netdev->type == LXC_NET_NONE)
3321 found_none = true;
3322 else
3323 found_nic = true;
3324 }
3325 if (found_none && !found_nic)
3326 return 1;
3327 return 0;
3328}
3329
3330/* try to move physical nics to the init netns */
b809f232 3331int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3332{
3333 int ret;
b809f232 3334 int oldfd;
811ef482 3335 char ifname[IFNAMSIZ];
b809f232 3336 struct lxc_list *iterator;
28d9e29e 3337 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3338 struct lxc_conf *conf = handler->conf;
811ef482 3339
b809f232
CB
3340 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3341 * the parent network namespace. We won't have this capability if we are
3342 * unprivileged.
3343 */
d0fbc7ba 3344 if (!handler->am_root)
b809f232 3345 return 0;
811ef482 3346
b809f232 3347 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3348
0037ab49 3349 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3350 if (oldfd < 0) {
3351 SYSERROR("Failed to preserve network namespace");
b809f232 3352 return -1;
811ef482
CB
3353 }
3354
b809f232 3355 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3356 if (ret < 0) {
3357 SYSERROR("Failed to enter network namespace");
3358 close(oldfd);
b809f232 3359 return -1;
811ef482
CB
3360 }
3361
b809f232
CB
3362 lxc_list_for_each(iterator, &conf->network) {
3363 struct lxc_netdev *netdev = iterator->elem;
811ef482 3364
b809f232
CB
3365 if (netdev->type != LXC_NET_PHYS)
3366 continue;
3367
3368 /* Retrieve the name of the interface in the container's network
3369 * namespace.
3370 */
3371 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3372 WARN("No interface corresponding to ifindex %d",
b809f232 3373 netdev->ifindex);
811ef482
CB
3374 continue;
3375 }
b809f232 3376
0037ab49 3377 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3378 if (ret < 0)
811ef482
CB
3379 WARN("Error moving network device \"%s\" back to "
3380 "network namespace", ifname);
b809f232
CB
3381 else
3382 TRACE("Moved network device \"%s\" back to network "
3383 "namespace", ifname);
811ef482 3384 }
811ef482 3385
b809f232 3386 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3387 close(oldfd);
b809f232
CB
3388 if (ret < 0) {
3389 SYSERROR("Failed to enter network namespace");
3390 return -1;
3391 }
3392
3393 return 0;
811ef482
CB
3394}
3395
3396static int setup_hw_addr(char *hwaddr, const char *ifname)
3397{
3398 struct sockaddr sockaddr;
3399 struct ifreq ifr;
6d1400b5 3400 int ret, fd;
811ef482
CB
3401
3402 ret = lxc_convert_mac(hwaddr, &sockaddr);
3403 if (ret) {
6d1400b5 3404 errno = -ret;
3405 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3406 return -1;
3407 }
3408
3409 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3410 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3411 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3412
ad9429e5 3413 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3414 if (fd < 0)
3415 return -1;
3416
3417 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3418 if (ret)
6d1400b5 3419 SYSERROR("Failed to perform ioctl");
3420
3421 close(fd);
811ef482
CB
3422
3423 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3424 ifr.ifr_name);
3425
3426 return ret;
3427}
3428
3429static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3430{
3431 struct lxc_list *iterator;
3432 int err;
3433
3434 lxc_list_for_each(iterator, ip) {
3435 struct lxc_inetdev *inetdev = iterator->elem;
3436
3437 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3438 &inetdev->bcast, inetdev->prefix);
3439 if (err) {
6d1400b5 3440 errno = -err;
3441 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3442 "with ifindex %d", ifindex);
811ef482
CB
3443 return -1;
3444 }
3445 }
3446
3447 return 0;
3448}
3449
3450static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3451{
3452 struct lxc_list *iterator;
3453 int err;
3454
3455 lxc_list_for_each(iterator, ip) {
3456 struct lxc_inet6dev *inet6dev = iterator->elem;
3457
3458 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3459 &inet6dev->mcast, &inet6dev->acast,
3460 inet6dev->prefix);
3461 if (err) {
6d1400b5 3462 errno = -err;
3463 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3464 "with ifindex %d", ifindex);
811ef482
CB
3465 return -1;
3466 }
3467 }
3468
3469 return 0;
3470}
3471
3472static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3473{
3474 char ifname[IFNAMSIZ];
3475 int err;
3476 const char *net_type_name;
3477 char *current_ifname = ifname;
009d6127 3478 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3479
3480 /* empty network namespace */
3481 if (!netdev->ifindex) {
3482 if (netdev->flags & IFF_UP) {
3483 err = lxc_netdev_up("lo");
3484 if (err) {
6d1400b5 3485 errno = -err;
3486 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3487 return -1;
3488 }
3489 }
3490
3491 if (netdev->type == LXC_NET_EMPTY)
3492 return 0;
3493
3494 if (netdev->type == LXC_NET_NONE)
3495 return 0;
3496
3497 if (netdev->type != LXC_NET_VETH) {
3498 net_type_name = lxc_net_type_to_str(netdev->type);
3499 ERROR("%s networks are not supported for containers "
535e8859 3500 "not setup up by privileged users", net_type_name);
811ef482
CB
3501 return -1;
3502 }
3503
3504 netdev->ifindex = if_nametoindex(netdev->name);
3505 }
3506
3507 /* get the new ifindex in case of physical netdev */
3508 if (netdev->type == LXC_NET_PHYS) {
3509 netdev->ifindex = if_nametoindex(netdev->link);
3510 if (!netdev->ifindex) {
3511 ERROR("Failed to get ifindex for network device \"%s\"",
3512 netdev->link);
3513 return -1;
3514 }
3515 }
3516
3517 /* retrieve the name of the interface */
3518 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3519 ERROR("Failed get name for network device with ifindex %d",
3520 netdev->ifindex);
3521 return -1;
3522 }
3523
3524 /* Default: let the system to choose one interface name.
3525 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3526 * netlink will replace the format specifier with an appropriate index.
3527 */
de4855a8
CB
3528 if (netdev->name[0] == '\0') {
3529 if (netdev->type == LXC_NET_PHYS)
94b1cade 3530 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3531 else
94b1cade 3532 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3533 }
811ef482
CB
3534
3535 /* rename the interface name */
3536 if (strcmp(ifname, netdev->name) != 0) {
3537 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3538 if (err) {
6d1400b5 3539 errno = -err;
3540 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3541 ifname, netdev->name);
811ef482
CB
3542 return -1;
3543 }
3544 }
3545
3546 /* Re-read the name of the interface because its name has changed
3547 * and would be automatically allocated by the system
3548 */
3549 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3550 ERROR("Failed get name for network device with ifindex %d",
3551 netdev->ifindex);
3552 return -1;
3553 }
3554
790255cf
CB
3555 /* Now update the recorded name of the network device to reflect the
3556 * name of the network device in the child's network namespace. We will
3557 * later on send this information back to the parent.
3558 */
94b1cade 3559 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3560
811ef482
CB
3561 /* set a mac address */
3562 if (netdev->hwaddr) {
3563 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3564 ERROR("Failed to setup hw address for network device \"%s\"",
3565 current_ifname);
3566 return -1;
3567 }
3568 }
3569
3570 /* setup ipv4 addresses on the interface */
3571 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3572 ERROR("Failed to setup ip addresses for network device \"%s\"",
3573 ifname);
3574 return -1;
3575 }
3576
3577 /* setup ipv6 addresses on the interface */
3578 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3579 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3580 ifname);
3581 return -1;
3582 }
3583
3584 /* set the network device up */
3585 if (netdev->flags & IFF_UP) {
811ef482
CB
3586 err = lxc_netdev_up(current_ifname);
3587 if (err) {
6d1400b5 3588 errno = -err;
3589 SYSERROR("Failed to set network device \"%s\" up",
3590 current_ifname);
811ef482
CB
3591 return -1;
3592 }
3593
3594 /* the network is up, make the loopback up too */
3595 err = lxc_netdev_up("lo");
3596 if (err) {
6d1400b5 3597 errno = -err;
3598 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3599 return -1;
3600 }
3601 }
3602
811ef482 3603 /* setup ipv4 gateway on the interface */
a2f9a670 3604 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3605 if (!(netdev->flags & IFF_UP)) {
3606 ERROR("Cannot add ipv4 gateway for network device "
3607 "\"%s\" when not bringing up the interface", ifname);
3608 return -1;
3609 }
3610
3611 if (lxc_list_empty(&netdev->ipv4)) {
3612 ERROR("Cannot add ipv4 gateway for network device "
3613 "\"%s\" when not assigning an address", ifname);
3614 return -1;
3615 }
3616
a2f9a670 3617 /* Setup device route if ipv4_gateway_dev is enabled */
3618 if (netdev->ipv4_gateway_dev) {
3619 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3620 if (err < 0) {
3621 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
6d1400b5 3622 ifname);
a2f9a670 3623 return minus_one_set_errno(-err);
811ef482 3624 }
a2f9a670 3625 } else {
009d6127 3626 /* Check the gateway address is valid */
3627 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3628 return minus_one_set_errno(errno);
3629
3630 /* Try adding a default route to the gateway address */
811ef482 3631 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3632 if (err < 0) {
3633 /* If adding the default route fails, this could be because the
3634 * gateway address is in a different subnet to the container's address.
3635 * To work around this, we try adding a static device route to the
3636 * gateway address first, and then try again.
3637 */
a2f9a670 3638 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3639 if (err < 0) {
a2f9a670 3640 errno = -err;
009d6127 3641 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3642 bufinet4, ifname);
3643 return -1;
a2f9a670 3644 }
6d1400b5 3645
a2f9a670 3646 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3647 if (err < 0) {
a2f9a670 3648 errno = -err;
009d6127 3649 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3650 bufinet4, ifname);
a2f9a670 3651 return -1;
811ef482 3652 }
811ef482
CB
3653 }
3654 }
3655 }
3656
3657 /* setup ipv6 gateway on the interface */
a2f9a670 3658 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482
CB
3659 if (!(netdev->flags & IFF_UP)) {
3660 ERROR("Cannot add ipv6 gateway for network device "
3661 "\"%s\" when not bringing up the interface", ifname);
3662 return -1;
3663 }
3664
3665 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
3666 ERROR("Cannot add ipv6 gateway for network device "
3667 "\"%s\" when not assigning an address", ifname);
3668 return -1;
3669 }
3670
a2f9a670 3671 /* Setup device route if ipv6_gateway_dev is enabled */
3672 if (netdev->ipv6_gateway_dev) {
3673 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3674 if (err < 0) {
3675 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
6d1400b5 3676 ifname);
a2f9a670 3677 return minus_one_set_errno(-err);
811ef482 3678 }
a2f9a670 3679 } else {
009d6127 3680 /* Check the gateway address is valid */
3681 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3682 return minus_one_set_errno(errno);
3683
3684 /* Try adding a default route to the gateway address */
811ef482 3685 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3686 if (err < 0) {
3687 /* If adding the default route fails, this could be because the
3688 * gateway address is in a different subnet to the container's address.
3689 * To work around this, we try adding a static device route to the
3690 * gateway address first, and then try again.
3691 */
a2f9a670 3692 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3693 if (err < 0) {
a2f9a670 3694 errno = -err;
009d6127 3695 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3696 bufinet6, ifname);
3697 return -1;
a2f9a670 3698 }
6d1400b5 3699
a2f9a670 3700 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3701 if (err < 0) {
a2f9a670 3702 errno = -err;
009d6127 3703 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3704 bufinet6, ifname);
a2f9a670 3705 return -1;
811ef482 3706 }
811ef482
CB
3707 }
3708 }
3709 }
3710
74c6e2b0 3711 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3712
3713 return 0;
3714}
3715
3716int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3717 struct lxc_list *network)
3718{
3719 struct lxc_list *iterator;
3720 struct lxc_netdev *netdev;
3721
811ef482
CB
3722 lxc_list_for_each(iterator, network) {
3723 netdev = iterator->elem;
3724
811ef482
CB
3725 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
3726 ERROR("failed to setup netdev");
3727 return -1;
3728 }
3729 }
3730
3731 if (!lxc_list_empty(network))
3732 INFO("network has been setup");
3733
3734 return 0;
3735}
7ab1ba02
CB
3736
3737int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3738{
3739 struct lxc_list *iterator;
3740 struct lxc_list *network = &handler->conf->network;
3741 int data_sock = handler->data_sock[0];
3742
d0fbc7ba 3743 if (handler->am_root)
7ab1ba02
CB
3744 return 0;
3745
3746 lxc_list_for_each(iterator, network) {
3747 int ret;
3748 struct lxc_netdev *netdev = iterator->elem;
3749
3750 if (netdev->type != LXC_NET_VETH)
3751 continue;
3752
7fbb15ec 3753 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3754 if (ret < 0)
7ab1ba02 3755 return -1;
7729f8e5 3756 TRACE("Sent network device name \"%s\" to child", netdev->name);
7ab1ba02
CB
3757 }
3758
3759 return 0;
3760}
3761
3762int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3763{
3764 struct lxc_list *iterator;
3765 struct lxc_list *network = &handler->conf->network;
3766 int data_sock = handler->data_sock[1];
3767
d0fbc7ba 3768 if (handler->am_root)
7ab1ba02
CB
3769 return 0;
3770
3771 lxc_list_for_each(iterator, network) {
3772 int ret;
3773 struct lxc_netdev *netdev = iterator->elem;
3774
3775 if (netdev->type != LXC_NET_VETH)
3776 continue;
3777
e3233f26 3778 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3779 if (ret < 0)
7ab1ba02 3780 return -1;
7729f8e5 3781 TRACE("Received network device name \"%s\" from parent", netdev->name);
7ab1ba02
CB
3782 }
3783
3784 return 0;
3785}
a1ae535a
CB
3786
3787int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3788{
3789 struct lxc_list *iterator, *network;
3790 int data_sock = handler->data_sock[0];
3791
3792 if (!handler->am_root)
3793 return 0;
3794
3795 network = &handler->conf->network;
3796 lxc_list_for_each(iterator, network) {
3797 int ret;
3798 struct lxc_netdev *netdev = iterator->elem;
3799
3800 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3801 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3802 if (ret < 0)
7729f8e5 3803 return -1;
a1ae535a
CB
3804
3805 /* Send network device ifindex in the child's namespace to
3806 * parent.
3807 */
7fbb15ec 3808 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3809 if (ret < 0)
7729f8e5 3810 return -1;
a1ae535a
CB
3811 }
3812
ad2ddfcd 3813 TRACE("Sent network device names and ifindices to parent");
a1ae535a 3814 return 0;
a1ae535a
CB
3815}
3816
3817int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3818{
3819 struct lxc_list *iterator, *network;
3820 int data_sock = handler->data_sock[1];
3821
3822 if (!handler->am_root)
3823 return 0;
3824
3825 network = &handler->conf->network;
3826 lxc_list_for_each(iterator, network) {
3827 int ret;
3828 struct lxc_netdev *netdev = iterator->elem;
3829
3830 /* Receive network device name in the child's namespace to
3831 * parent.
3832 */
e3233f26 3833 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3834 if (ret < 0)
7729f8e5 3835 return -1;
a1ae535a
CB
3836
3837 /* Receive network device ifindex in the child's namespace to
3838 * parent.
3839 */
e3233f26 3840 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3841 if (ret < 0)
7729f8e5 3842 return -1;
a1ae535a
CB
3843 }
3844
3845 return 0;
a1ae535a 3846}
bb84beda
CB
3847
3848void lxc_delete_network(struct lxc_handler *handler)
3849{
3850 bool bret;
3851
3852 if (handler->am_root)
3853 bret = lxc_delete_network_priv(handler);
3854 else
3855 bret = lxc_delete_network_unpriv(handler);
3856 if (!bret)
3857 DEBUG("Failed to delete network devices");
3858 else
3859 DEBUG("Deleted network devices");
3860}
1cd95214 3861
1cd95214
CB
3862int lxc_netns_set_nsid(int fd)
3863{
41a3300d 3864 int ret;
0ce60f0d
CB
3865 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3866 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3867 NLMSG_ALIGN(1024)];
1cd95214 3868 struct nl_handler nlh;
0ce60f0d
CB
3869 struct nlmsghdr *hdr;
3870 struct rtgenmsg *msg;
bfcedc7e 3871 int saved_errno;
9d036caa
CB
3872 const __s32 ns_id = -1;
3873 const __u32 netns_fd = fd;
1cd95214
CB
3874
3875 ret = netlink_open(&nlh, NETLINK_ROUTE);
3876 if (ret < 0)
41a3300d 3877 return -1;
1cd95214 3878
0ce60f0d 3879 memset(buf, 0, sizeof(buf));
6ce39620
CB
3880
3881#pragma GCC diagnostic push
3882#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3883 hdr = (struct nlmsghdr *)buf;
3884 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3885#pragma GCC diagnostic pop
1cd95214 3886
0ce60f0d
CB
3887 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3888 hdr->nlmsg_type = RTM_NEWNSID;
3889 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3890 hdr->nlmsg_pid = 0;
3891 hdr->nlmsg_seq = RTM_NEWNSID;
3892 msg->rtgen_family = AF_UNSPEC;
1cd95214 3893
9d036caa
CB
3894 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3895 if (ret < 0)
3896 goto on_error;
3897
3898 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3899 if (ret < 0)
3900 goto on_error;
1cd95214 3901
9fbbc427 3902 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3903
3904on_error:
bfcedc7e 3905 saved_errno = errno;
1cd95214 3906 netlink_close(&nlh);
bfcedc7e 3907 errno = saved_errno;
1cd95214 3908
9d036caa 3909 return ret;
1cd95214 3910}
938980ba
CB
3911
3912static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3913{
3914
3915 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3916
3917 while (RTA_OK(rta, len)) {
3918 unsigned short type = rta->rta_type;
3919
3920 if ((type <= max) && (!tb[type]))
3921 tb[type] = rta;
3922
6ce39620
CB
3923#pragma GCC diagnostic push
3924#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3925 rta = RTA_NEXT(rta, len);
6ce39620 3926#pragma GCC diagnostic pop
938980ba
CB
3927 }
3928
3929 return 0;
3930}
3931
3932static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3933{
3934 return *(__s32 *)RTA_DATA(rta);
3935}
3936
3937#ifndef NETNS_RTA
3938#define NETNS_RTA(r) \
3939 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3940#endif
3941
3942int lxc_netns_get_nsid(int fd)
3943{
3944 int ret;
3945 ssize_t len;
3946 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3947 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3948 NLMSG_ALIGN(1024)];
938980ba
CB
3949 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3950 struct nl_handler nlh;
3951 struct nlmsghdr *hdr;
3952 struct rtgenmsg *msg;
3953 int saved_errno;
3954 __u32 netns_fd = fd;
3955
3956 ret = netlink_open(&nlh, NETLINK_ROUTE);
3957 if (ret < 0)
3958 return -1;
3959
3960 memset(buf, 0, sizeof(buf));
6ce39620
CB
3961
3962#pragma GCC diagnostic push
3963#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3964 hdr = (struct nlmsghdr *)buf;
3965 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3966#pragma GCC diagnostic pop
938980ba
CB
3967
3968 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3969 hdr->nlmsg_type = RTM_GETNSID;
3970 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3971 hdr->nlmsg_pid = 0;
3972 hdr->nlmsg_seq = RTM_GETNSID;
3973 msg->rtgen_family = AF_UNSPEC;
3974
9d036caa
CB
3975 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3976 if (ret == 0)
3977 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3978
938980ba
CB
3979 saved_errno = errno;
3980 netlink_close(&nlh);
3981 errno = saved_errno;
3982 if (ret < 0)
3983 return -1;
3984
9d036caa 3985 errno = EINVAL;
938980ba
CB
3986 msg = NLMSG_DATA(hdr);
3987 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
3988 if (len < 0)
3989 return -1;
3990
6ce39620
CB
3991#pragma GCC diagnostic push
3992#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3993 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
3994 if (tb[__LXC_NETNSA_NSID])
3995 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 3996#pragma GCC diagnostic pop
938980ba
CB
3997
3998 return -1;
3999}