]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: send names for all non-trivial network types
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 71static const char loop_device[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
8021de25 335 char peer[IFNAMSIZ];
811ef482 336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
8021de25
CB
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
346 return -1;
347
8021de25 348 if (!lxc_mkifname(peer))
811ef482
CB
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
6d1400b5 354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
966e9f1f 357 goto on_error;
811ef482
CB
358 }
359
a9704f05
CB
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
811ef482
CB
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 365 goto on_error;
811ef482
CB
366 }
367
3bef7b7b
TP
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
811ef482 384 if (netdev->upscript) {
14a7b0f9
CB
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
966e9f1f 395 goto on_error;
811ef482
CB
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
966e9f1f
CB
402
403on_error:
811ef482 404 lxc_netdev_delete_by_name(peer);
811ef482
CB
405 return -1;
406}
407
c9f52382 408static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409{
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493}
494
495static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496{
dd119206 497 char peer[IFNAMSIZ];
c9f52382 498 int err;
006e135e 499 unsigned int mtu = 0;
c9f52382 500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
dd119206
CB
506 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 508 return -1;
509
dd119206 510 if (!lxc_mkifname(peer))
c9f52382 511 return -1;
512
dd119206
CB
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
514 netdev->priv.ipvlan_attr.isolation);
c9f52382 515 if (err) {
dd119206
CB
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
517 peer, netdev->link);
c9f52382 518 goto on_error;
519 }
520
e7fdd504
CB
521 strlcpy(netdev->created_name, peer, IFNAMSIZ);
522
c9f52382 523 netdev->ifindex = if_nametoindex(peer);
524 if (!netdev->ifindex) {
525 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
526 goto on_error;
527 }
528
006e135e 529 if (netdev->mtu) {
530 err = lxc_safe_uint(netdev->mtu, &mtu);
531 if (err < 0) {
532 errno = -err;
dd119206
CB
533 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
534 netdev->mtu, peer);
006e135e 535 goto on_error;
536 }
537
538 err = lxc_netdev_set_mtu(peer, mtu);
539 if (err < 0) {
540 errno = -err;
dd119206
CB
541 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
542 netdev->mtu, peer);
006e135e 543 goto on_error;
544 }
545 }
546
c9f52382 547 if (netdev->upscript) {
548 char *argv[] = {
549 "ipvlan",
550 netdev->link,
551 NULL,
552 };
553
dd119206
CB
554 err = run_script_argv(handler->name, handler->conf->hooks_version,
555 "net", netdev->upscript, "up", argv);
c9f52382 556 if (err < 0)
557 goto on_error;
558 }
559
dd119206
CB
560 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
561 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 562
563 return 0;
564
565on_error:
566 lxc_netdev_delete_by_name(peer);
567 return -1;
568}
569
811ef482
CB
570static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
571{
572 char peer[IFNAMSIZ];
573 int err;
574 static uint16_t vlan_cntr = 0;
575 unsigned int mtu = 0;
576
de4855a8 577 if (netdev->link[0] == '\0') {
811ef482
CB
578 ERROR("No link for vlan network device specified");
579 return -1;
580 }
581
d4d68410
CB
582 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
583 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
584 if (err < 0 || (size_t)err >= sizeof(peer))
585 return -1;
586
587 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
588 if (err) {
6d1400b5 589 errno = -err;
590 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
591 peer, netdev->link);
811ef482
CB
592 return -1;
593 }
594
83530dba
CB
595 strlcpy(netdev->created_name, peer, IFNAMSIZ);
596
811ef482
CB
597 netdev->ifindex = if_nametoindex(peer);
598 if (!netdev->ifindex) {
599 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 600 goto on_error;
601 }
602
603 if (netdev->mtu) {
604 err = lxc_safe_uint(netdev->mtu, &mtu);
605 if (err < 0) {
606 errno = -err;
d4d68410
CB
607 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
608 netdev->mtu, peer);
3e2a7b08 609 goto on_error;
610 }
611
612 err = lxc_netdev_set_mtu(peer, mtu);
613 if (err) {
614 errno = -err;
d4d68410
CB
615 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
616 netdev->mtu, peer);
3e2a7b08 617 goto on_error;
618 }
811ef482
CB
619 }
620
3a73d9f1 621 if (netdev->upscript) {
622 char *argv[] = {
623 "vlan",
624 netdev->link,
625 NULL,
626 };
627
d4d68410
CB
628 err = run_script_argv(handler->name, handler->conf->hooks_version,
629 "net", netdev->upscript, "up", argv);
19abca58 630 if (err < 0) {
3e2a7b08 631 goto on_error;
19abca58 632 }
3a73d9f1 633 }
634
d4d68410
CB
635 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
636 netdev->ifindex);
811ef482
CB
637
638 return 0;
3e2a7b08 639
640on_error:
641 lxc_netdev_delete_by_name(peer);
642 return -1;
811ef482
CB
643}
644
645static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
646{
0b154989 647 int err, mtu_orig = 0;
3bef7b7b 648 unsigned int mtu = 0;
14a7b0f9 649
de4855a8 650 if (netdev->link[0] == '\0') {
811ef482
CB
651 ERROR("No link for physical interface specified");
652 return -1;
653 }
654
75b074ee
CB
655 /*
656 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
657 * network namespace because we need it to move the device from the
658 * host's network namespace to the container's network namespace later
659 * on.
660 * Note that netdev->link will contain the name of the physical network
661 * device in the host's namespace.
662 */
811ef482
CB
663 netdev->ifindex = if_nametoindex(netdev->link);
664 if (!netdev->ifindex) {
665 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
666 return -1;
667 }
668
61302ef7
CB
669 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
670
75b074ee
CB
671 /*
672 * Store the ifindex of the host's network device in the host's
790255cf
CB
673 * namespace.
674 */
675 netdev->priv.phys_attr.ifindex = netdev->ifindex;
676
75b074ee
CB
677 /*
678 * Get original device MTU setting and store for restoration after
679 * container shutdown.
680 */
0b154989
TP
681 mtu_orig = netdev_get_mtu(netdev->ifindex);
682 if (mtu_orig < 0) {
683 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
684 return minus_one_set_errno(-mtu_orig);
685 }
686
687 netdev->priv.phys_attr.mtu = mtu_orig;
688
3bef7b7b
TP
689 if (netdev->mtu) {
690 err = lxc_safe_uint(netdev->mtu, &mtu);
691 if (err < 0) {
692 errno = -err;
75b074ee
CB
693 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
694 netdev->mtu, netdev->link);
3bef7b7b
TP
695 return -1;
696 }
14a7b0f9 697
3bef7b7b
TP
698 err = lxc_netdev_set_mtu(netdev->link, mtu);
699 if (err < 0) {
700 errno = -err;
75b074ee
CB
701 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
702 netdev->mtu, netdev->link);
3bef7b7b
TP
703 return -1;
704 }
705 }
706
707 if (netdev->upscript) {
708 char *argv[] = {
709 "phys",
710 netdev->link,
711 NULL,
712 };
713
75b074ee
CB
714 err = run_script_argv(handler->name, handler->conf->hooks_version,
715 "net", netdev->upscript, "up", argv);
3bef7b7b
TP
716 if (err < 0) {
717 return -1;
718 }
719 }
720
75b074ee
CB
721 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
722 netdev->ifindex);
811ef482
CB
723
724 return 0;
725}
726
727static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
728{
14a7b0f9
CB
729 int ret;
730 char *argv[] = {
731 "empty",
732 NULL,
733 };
734
811ef482 735 netdev->ifindex = 0;
14a7b0f9
CB
736 if (!netdev->upscript)
737 return 0;
738
739 ret = run_script_argv(handler->name, handler->conf->hooks_version,
740 "net", netdev->upscript, "up", argv);
741 if (ret < 0)
742 return -1;
743
811ef482
CB
744 return 0;
745}
746
747static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
748{
749 netdev->ifindex = 0;
750 return 0;
751}
752
753static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
754 [LXC_NET_VETH] = instantiate_veth,
755 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 756 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
757 [LXC_NET_VLAN] = instantiate_vlan,
758 [LXC_NET_PHYS] = instantiate_phys,
759 [LXC_NET_EMPTY] = instantiate_empty,
760 [LXC_NET_NONE] = instantiate_none,
761};
762
763static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
764{
14a7b0f9
CB
765 int ret;
766 char *argv[] = {
767 "veth",
768 netdev->link,
769 NULL,
770 NULL,
771 };
772
773 if (!netdev->downscript)
774 return 0;
811ef482 775
de4855a8 776 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 777 argv[2] = netdev->priv.veth_attr.pair;
811ef482 778 else
14a7b0f9
CB
779 argv[2] = netdev->priv.veth_attr.veth1;
780
781 ret = run_script_argv(handler->name,
782 handler->conf->hooks_version, "net",
783 netdev->downscript, "down", argv);
784 if (ret < 0)
785 return -1;
811ef482 786
811ef482
CB
787 return 0;
788}
789
790static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
791{
14a7b0f9
CB
792 int ret;
793 char *argv[] = {
794 "macvlan",
795 netdev->link,
796 NULL,
797 };
798
799 if (!netdev->downscript)
800 return 0;
801
802 ret = run_script_argv(handler->name, handler->conf->hooks_version,
803 "net", netdev->downscript, "down", argv);
804 if (ret < 0)
805 return -1;
811ef482 806
811ef482
CB
807 return 0;
808}
809
c9f52382 810static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811{
812 int ret;
813 char *argv[] = {
814 "ipvlan",
815 netdev->link,
816 NULL,
817 };
818
819 if (!netdev->downscript)
820 return 0;
821
822 ret = run_script_argv(handler->name, handler->conf->hooks_version,
823 "net", netdev->downscript, "down", argv);
824 if (ret < 0)
825 return -1;
826
827 return 0;
828}
829
811ef482
CB
830static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
831{
3a73d9f1 832 int ret;
833 char *argv[] = {
834 "vlan",
835 netdev->link,
836 NULL,
837 };
838
839 if (!netdev->downscript)
840 return 0;
841
842 ret = run_script_argv(handler->name, handler->conf->hooks_version,
843 "net", netdev->downscript, "down", argv);
844 if (ret < 0)
845 return -1;
846
811ef482
CB
847 return 0;
848}
849
850static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
851{
14a7b0f9
CB
852 int ret;
853 char *argv[] = {
854 "phys",
855 netdev->link,
856 NULL,
857 };
858
859 if (!netdev->downscript)
860 return 0;
861
862 ret = run_script_argv(handler->name, handler->conf->hooks_version,
863 "net", netdev->downscript, "down", argv);
864 if (ret < 0)
865 return -1;
811ef482 866
811ef482
CB
867 return 0;
868}
869
870static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
871{
14a7b0f9
CB
872 int ret;
873 char *argv[] = {
874 "empty",
875 NULL,
876 };
877
878 if (!netdev->downscript)
879 return 0;
880
881 ret = run_script_argv(handler->name, handler->conf->hooks_version,
882 "net", netdev->downscript, "down", argv);
883 if (ret < 0)
884 return -1;
811ef482 885
811ef482
CB
886 return 0;
887}
888
889static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
890{
891 return 0;
892}
893
894static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
895 [LXC_NET_VETH] = shutdown_veth,
896 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 897 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
898 [LXC_NET_VLAN] = shutdown_vlan,
899 [LXC_NET_PHYS] = shutdown_phys,
900 [LXC_NET_EMPTY] = shutdown_empty,
901 [LXC_NET_NONE] = shutdown_none,
902};
903
0037ab49
TP
904static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
905{
906 int err;
907 struct nl_handler nlh;
908 struct ifinfomsg *ifi;
909 struct nlmsg *nlmsg = NULL;
910
911 err = netlink_open(&nlh, NETLINK_ROUTE);
912 if (err)
913 return err;
914
915 err = -ENOMEM;
916 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
917 if (!nlmsg)
918 goto out;
919
920 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
921 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
922
923 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
924 if (!ifi)
925 goto out;
926 ifi->ifi_family = AF_UNSPEC;
927 ifi->ifi_index = ifindex;
928
929 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
930 goto out;
931
932 if (ifname != NULL) {
933 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
934 goto out;
935 }
936
937 err = netlink_transaction(&nlh, nlmsg, nlmsg);
938out:
939 netlink_close(&nlh);
940 nlmsg_free(nlmsg);
941 return err;
942}
943
ebc73a67 944int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 945{
ebc73a67 946 int err;
0ad19a3f 947 struct nl_handler nlh;
06f976ca 948 struct ifinfomsg *ifi;
ebc73a67 949 struct nlmsg *nlmsg = NULL;
0ad19a3f 950
3cfc0f3a
MN
951 err = netlink_open(&nlh, NETLINK_ROUTE);
952 if (err)
953 return err;
0ad19a3f 954
3cfc0f3a 955 err = -ENOMEM;
0ad19a3f 956 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
957 if (!nlmsg)
958 goto out;
959
ebc73a67 960 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
961 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
962
963 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
964 if (!ifi)
965 goto out;
06f976ca
SZ
966 ifi->ifi_family = AF_UNSPEC;
967 ifi->ifi_index = ifindex;
0ad19a3f 968
969 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
970 goto out;
971
8d357196
DY
972 if (ifname != NULL) {
973 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
974 goto out;
975 }
976
3cfc0f3a 977 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 978out:
979 netlink_close(&nlh);
980 nlmsg_free(nlmsg);
981 return err;
982}
983
ebc73a67
CB
984/* If we are asked to move a wireless interface, then we must actually move its
985 * phyN device. Detect that condition and return the physname here. The physname
986 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
987 */
988#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 989static char *is_wlan(const char *ifname)
e5848d39 990{
b0293710 991 __do_free char *path = NULL;
ebc73a67 992 int i, ret;
e5848d39 993 long physlen;
ebc73a67 994 size_t len;
e5848d39 995 FILE *f;
ebc73a67 996 char *physname = NULL;
e5848d39 997
ebc73a67 998 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 999 path = must_realloc(NULL, len + 1);
e5848d39 1000 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1001 if (ret < 0 || (size_t)ret >= len)
e5848d39 1002 goto bad;
ebc73a67 1003
ebc73a67
CB
1004 f = fopen(path, "r");
1005 if (!f)
e5848d39 1006 goto bad;
ebc73a67 1007
1a0e70ac 1008 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1009 fseek(f, 0, SEEK_END);
1010 physlen = ftell(f);
1011 fseek(f, 0, SEEK_SET);
7d1cde93
SX
1012 if (physlen < 0) {
1013 fclose(f);
0382c0da 1014 goto bad;
7d1cde93 1015 }
ebc73a67
CB
1016
1017 physname = malloc(physlen + 1);
ee54ea9a 1018 if (!physname) {
acf47e1b 1019 fclose(f);
e5848d39 1020 goto bad;
ee54ea9a 1021 }
ebc73a67
CB
1022
1023 memset(physname, 0, physlen + 1);
e5848d39
SH
1024 ret = fread(physname, 1, physlen, f);
1025 fclose(f);
1026 if (ret < 0)
1027 goto bad;
1028
ebc73a67 1029 for (i = 0; i < physlen; i++) {
e5848d39
SH
1030 if (physname[i] == '\n')
1031 physname[i] = '\0';
ebc73a67 1032
e5848d39
SH
1033 if (physname[i] == '\0')
1034 break;
1035 }
1036
1037 return physname;
1038
1039bad:
f10fad2f 1040 free(physname);
e5848d39
SH
1041 return NULL;
1042}
1043
ebc73a67
CB
1044static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1045 const char *new)
e5848d39 1046{
ebc73a67 1047 pid_t fpid;
e5848d39 1048
ebc73a67 1049 fpid = fork();
e5848d39
SH
1050 if (fpid < 0)
1051 return -1;
ebc73a67 1052
e5848d39
SH
1053 if (fpid != 0)
1054 return wait_for_pid(fpid);
ebc73a67 1055
e5848d39
SH
1056 if (!switch_to_ns(pid, "net"))
1057 return -1;
ebc73a67 1058
05ec44f8 1059 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1060}
1061
ebc73a67
CB
1062static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1063 const char *newname)
e5848d39 1064{
e5848d39 1065 char *cmd;
ebc73a67
CB
1066 pid_t fpid;
1067 int err = -1;
e5848d39
SH
1068
1069 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1070 * However, IIUC this involves a bit more complicated work to talk to
1071 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1072 */
1073 cmd = on_path("iw", NULL);
1074 if (!cmd)
1075 goto out1;
1076 free(cmd);
1077
1078 fpid = fork();
1079 if (fpid < 0)
1080 goto out1;
ebc73a67 1081
e5848d39
SH
1082 if (fpid == 0) {
1083 char pidstr[30];
1084 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1085 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1086 (char *)NULL);
ebd582ae 1087 _exit(EXIT_FAILURE);
e5848d39 1088 }
ebc73a67 1089
e5848d39
SH
1090 if (wait_for_pid(fpid))
1091 goto out1;
1092
1093 err = 0;
1094 if (newname)
1095 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1096
1097out1:
1098 free(physname);
1099 return err;
1100}
1101
8d357196 1102int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1103{
1104 int index;
e5848d39 1105 char *physname;
8befa924 1106
8befa924
SH
1107 if (!ifname)
1108 return -EINVAL;
1109
32571606 1110 index = if_nametoindex(ifname);
49428bf3
DY
1111 if (!index)
1112 return -EINVAL;
32571606 1113
ebc73a67
CB
1114 physname = is_wlan(ifname);
1115 if (physname)
e5848d39
SH
1116 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1117
8d357196 1118 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1119}
1120
b84f58b9 1121int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1122{
b84f58b9 1123 int err;
ebc73a67
CB
1124 struct ifinfomsg *ifi;
1125 struct nl_handler nlh;
1126 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1127
3cfc0f3a
MN
1128 err = netlink_open(&nlh, NETLINK_ROUTE);
1129 if (err)
1130 return err;
0ad19a3f 1131
3cfc0f3a 1132 err = -ENOMEM;
0ad19a3f 1133 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1134 if (!nlmsg)
1135 goto out;
1136
06f976ca 1137 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1138 if (!answer)
1139 goto out;
1140
ebc73a67 1141 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1142 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1143
1144 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1145 if (!ifi)
1146 goto out;
06f976ca
SZ
1147 ifi->ifi_family = AF_UNSPEC;
1148 ifi->ifi_index = ifindex;
0ad19a3f 1149
3cfc0f3a 1150 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1151out:
1152 netlink_close(&nlh);
1153 nlmsg_free(answer);
1154 nlmsg_free(nlmsg);
1155 return err;
1156}
1157
b84f58b9
DL
1158int lxc_netdev_delete_by_name(const char *name)
1159{
1160 int index;
1161
1162 index = if_nametoindex(name);
1163 if (!index)
1164 return -EINVAL;
1165
1166 return lxc_netdev_delete_by_index(index);
1167}
1168
1169int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1170{
ebc73a67 1171 int err, len;
06f976ca 1172 struct ifinfomsg *ifi;
ebc73a67
CB
1173 struct nl_handler nlh;
1174 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1175
3cfc0f3a
MN
1176 err = netlink_open(&nlh, NETLINK_ROUTE);
1177 if (err)
1178 return err;
b9a5bb58 1179
b84f58b9 1180 len = strlen(newname);
90d79629
CB
1181 if (len == 1 || len >= IFNAMSIZ) {
1182 err = -EINVAL;
b84f58b9 1183 goto out;
90d79629 1184 }
b84f58b9 1185
3cfc0f3a 1186 err = -ENOMEM;
b9a5bb58
DL
1187 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1188 if (!nlmsg)
1189 goto out;
1190
06f976ca 1191 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1192 if (!answer)
1193 goto out;
1194
ebc73a67 1195 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1196 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1197
1198 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1199 if (!ifi)
1200 goto out;
06f976ca
SZ
1201 ifi->ifi_family = AF_UNSPEC;
1202 ifi->ifi_index = ifindex;
b84f58b9
DL
1203
1204 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1205 goto out;
b9a5bb58 1206
3cfc0f3a 1207 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1208out:
1209 netlink_close(&nlh);
1210 nlmsg_free(answer);
1211 nlmsg_free(nlmsg);
1212 return err;
1213}
1214
b84f58b9
DL
1215int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1216{
1217 int len, index;
1218
1219 len = strlen(oldname);
dae3fdf6 1220 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1221 return -EINVAL;
1222
1223 index = if_nametoindex(oldname);
1224 if (!index)
1225 return -EINVAL;
1226
1227 return lxc_netdev_rename_by_index(index, newname);
1228}
1229
8befa924 1230int netdev_set_flag(const char *name, int flag)
0ad19a3f 1231{
ebc73a67 1232 int err, index, len;
06f976ca 1233 struct ifinfomsg *ifi;
ebc73a67
CB
1234 struct nl_handler nlh;
1235 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1236
3cfc0f3a
MN
1237 err = netlink_open(&nlh, NETLINK_ROUTE);
1238 if (err)
1239 return err;
0ad19a3f 1240
3cfc0f3a 1241 err = -EINVAL;
0ad19a3f 1242 len = strlen(name);
dae3fdf6 1243 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1244 goto out;
1245
3cfc0f3a 1246 err = -ENOMEM;
0ad19a3f 1247 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1248 if (!nlmsg)
1249 goto out;
1250
06f976ca 1251 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1252 if (!answer)
1253 goto out;
1254
3cfc0f3a 1255 err = -EINVAL;
0ad19a3f 1256 index = if_nametoindex(name);
1257 if (!index)
1258 goto out;
1259
ebc73a67 1260 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1261 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1262
1263 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1264 if (!ifi) {
1265 err = -ENOMEM;
1266 goto out;
1267 }
06f976ca
SZ
1268 ifi->ifi_family = AF_UNSPEC;
1269 ifi->ifi_index = index;
1270 ifi->ifi_change |= IFF_UP;
1271 ifi->ifi_flags |= flag;
0ad19a3f 1272
1273 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1274out:
1275 netlink_close(&nlh);
1276 nlmsg_free(nlmsg);
1277 nlmsg_free(answer);
1278 return err;
1279}
1280
ebc73a67 1281int netdev_get_flag(const char *name, int *flag)
efa1cf45 1282{
ebc73a67 1283 int err, index, len;
a4318300 1284 struct ifinfomsg *ifi;
ebc73a67
CB
1285 struct nl_handler nlh;
1286 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1287
1288 if (!name)
1289 return -EINVAL;
1290
1291 err = netlink_open(&nlh, NETLINK_ROUTE);
1292 if (err)
1293 return err;
1294
1295 err = -EINVAL;
1296 len = strlen(name);
1297 if (len == 1 || len >= IFNAMSIZ)
1298 goto out;
1299
1300 err = -ENOMEM;
1301 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1302 if (!nlmsg)
1303 goto out;
1304
06f976ca 1305 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1306 if (!answer)
1307 goto out;
1308
1309 err = -EINVAL;
1310 index = if_nametoindex(name);
1311 if (!index)
1312 goto out;
1313
06f976ca
SZ
1314 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1315 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1316
1317 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1318 if (!ifi) {
1319 err = -ENOMEM;
1320 goto out;
1321 }
06f976ca
SZ
1322 ifi->ifi_family = AF_UNSPEC;
1323 ifi->ifi_index = index;
efa1cf45
DY
1324
1325 err = netlink_transaction(&nlh, nlmsg, answer);
1326 if (err)
1327 goto out;
1328
06f976ca 1329 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1330
1331 *flag = ifi->ifi_flags;
1332out:
1333 netlink_close(&nlh);
1334 nlmsg_free(nlmsg);
1335 nlmsg_free(answer);
1336 return err;
1337}
1338
1339/*
1340 * \brief Check a interface is up or not.
1341 *
1342 * \param name: name for the interface.
1343 *
1344 * \return int.
1345 * 0 means interface is down.
1346 * 1 means interface is up.
1347 * Others means error happened, and ret-value is the error number.
1348 */
ebc73a67 1349int lxc_netdev_isup(const char *name)
efa1cf45 1350{
ebc73a67 1351 int err, flag;
efa1cf45
DY
1352
1353 err = netdev_get_flag(name, &flag);
1354 if (err)
ebc73a67
CB
1355 return err;
1356
efa1cf45
DY
1357 if (flag & IFF_UP)
1358 return 1;
ebc73a67 1359
efa1cf45 1360 return 0;
efa1cf45
DY
1361}
1362
0130df54
SH
1363int netdev_get_mtu(int ifindex)
1364{
ebc73a67 1365 int answer_len, err, res;
0130df54 1366 struct nl_handler nlh;
06f976ca 1367 struct ifinfomsg *ifi;
0130df54 1368 struct nlmsghdr *msg;
ebc73a67
CB
1369 int readmore = 0, recv_len = 0;
1370 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1371
1372 err = netlink_open(&nlh, NETLINK_ROUTE);
1373 if (err)
1374 return err;
1375
1376 err = -ENOMEM;
1377 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1378 if (!nlmsg)
1379 goto out;
1380
06f976ca 1381 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1382 if (!answer)
1383 goto out;
1384
1385 /* Save the answer buffer length, since it will be overwritten
1386 * on the first receive (and we might need to receive more than
ebc73a67
CB
1387 * once.
1388 */
06f976ca
SZ
1389 answer_len = answer->nlmsghdr->nlmsg_len;
1390
ebc73a67 1391 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1392 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1393
06f976ca 1394 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1395 if (!ifi)
1396 goto out;
06f976ca 1397 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1398
1399 /* Send the request for addresses, which returns all addresses
1400 * on all interfaces. */
1401 err = netlink_send(&nlh, nlmsg);
1402 if (err < 0)
1403 goto out;
1404
6ce39620
CB
1405#pragma GCC diagnostic push
1406#pragma GCC diagnostic ignored "-Wcast-align"
1407
0130df54
SH
1408 do {
1409 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1410 * overwritten by a previous receive.
1411 */
06f976ca 1412 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1413
1414 /* Get the (next) batch of reply messages */
1415 err = netlink_rcv(&nlh, answer);
1416 if (err < 0)
1417 goto out;
1418
1419 recv_len = err;
0130df54
SH
1420
1421 /* Satisfy the typing for the netlink macros */
06f976ca 1422 msg = answer->nlmsghdr;
0130df54
SH
1423
1424 while (NLMSG_OK(msg, recv_len)) {
1425
1426 /* Stop reading if we see an error message */
1427 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1428 struct nlmsgerr *errmsg =
1429 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1430 err = errmsg->error;
1431 goto out;
1432 }
1433
1434 /* Stop reading if we see a NLMSG_DONE message */
1435 if (msg->nlmsg_type == NLMSG_DONE) {
1436 readmore = 0;
1437 break;
1438 }
1439
06f976ca 1440 ifi = NLMSG_DATA(msg);
0130df54
SH
1441 if (ifi->ifi_index == ifindex) {
1442 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1443 int attr_len =
1444 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1445 res = 0;
ebc73a67
CB
1446 while (RTA_OK(rta, attr_len)) {
1447 /* Found a local address for the
1448 * requested interface, return it.
1449 */
0130df54 1450 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1451 memcpy(&res, RTA_DATA(rta),
1452 sizeof(int));
0130df54
SH
1453 err = res;
1454 goto out;
1455 }
1456 rta = RTA_NEXT(rta, attr_len);
1457 }
0130df54
SH
1458 }
1459
ebc73a67
CB
1460 /* Keep reading more data from the socket if the last
1461 * message had the NLF_F_MULTI flag set.
1462 */
0130df54
SH
1463 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1464
ebc73a67 1465 /* Look at the next message received in this buffer. */
0130df54
SH
1466 msg = NLMSG_NEXT(msg, recv_len);
1467 }
1468 } while (readmore);
1469
6ce39620
CB
1470#pragma GCC diagnostic pop
1471
ebc73a67 1472 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1473 err = -1;
1474
1475out:
1476 netlink_close(&nlh);
1477 nlmsg_free(answer);
1478 nlmsg_free(nlmsg);
1479 return err;
1480}
1481
d472214b 1482int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1483{
ebc73a67 1484 int err, index, len;
06f976ca 1485 struct ifinfomsg *ifi;
ebc73a67
CB
1486 struct nl_handler nlh;
1487 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1488
3cfc0f3a
MN
1489 err = netlink_open(&nlh, NETLINK_ROUTE);
1490 if (err)
1491 return err;
75d09f83 1492
3cfc0f3a 1493 err = -EINVAL;
75d09f83 1494 len = strlen(name);
dae3fdf6 1495 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1496 goto out;
1497
3cfc0f3a 1498 err = -ENOMEM;
75d09f83
DL
1499 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1500 if (!nlmsg)
1501 goto out;
1502
06f976ca 1503 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1504 if (!answer)
1505 goto out;
1506
3cfc0f3a 1507 err = -EINVAL;
75d09f83
DL
1508 index = if_nametoindex(name);
1509 if (!index)
1510 goto out;
1511
ebc73a67 1512 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1513 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1514
1515 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1516 if (!ifi) {
1517 err = -ENOMEM;
1518 goto out;
1519 }
06f976ca
SZ
1520 ifi->ifi_family = AF_UNSPEC;
1521 ifi->ifi_index = index;
75d09f83
DL
1522
1523 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1524 goto out;
1525
1526 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1527out:
1528 netlink_close(&nlh);
1529 nlmsg_free(nlmsg);
1530 nlmsg_free(answer);
1531 return err;
1532}
1533
d472214b 1534int lxc_netdev_up(const char *name)
0ad19a3f 1535{
d472214b 1536 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1537}
1538
d472214b 1539int lxc_netdev_down(const char *name)
0ad19a3f 1540{
d472214b 1541 return netdev_set_flag(name, 0);
0ad19a3f 1542}
1543
497353b6 1544int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1545{
ebc73a67 1546 int err, len;
06f976ca 1547 struct ifinfomsg *ifi;
ebc73a67 1548 struct nl_handler nlh;
0ad19a3f 1549 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1550 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1551
3cfc0f3a
MN
1552 err = netlink_open(&nlh, NETLINK_ROUTE);
1553 if (err)
1554 return err;
0ad19a3f 1555
3cfc0f3a 1556 err = -EINVAL;
0ad19a3f 1557 len = strlen(name1);
dae3fdf6 1558 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1559 goto out;
1560
1561 len = strlen(name2);
dae3fdf6 1562 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1563 goto out;
1564
3cfc0f3a 1565 err = -ENOMEM;
0ad19a3f 1566 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1567 if (!nlmsg)
1568 goto out;
1569
06f976ca 1570 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1571 if (!answer)
1572 goto out;
1573
06f976ca 1574 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1575 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1576 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1577
1578 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1579 if (!ifi)
1580 goto out;
06f976ca 1581 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1582
3cfc0f3a 1583 err = -EINVAL;
79e68309 1584 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1585 if (!nest1)
1586 goto out;
1587
1588 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1589 goto out;
1590
1591 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1592 if (!nest2)
1593 goto out;
1594
1595 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1596 if (!nest3)
1597 goto out;
1598
06f976ca 1599 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1600 if (!ifi) {
1601 err = -ENOMEM;
06f976ca 1602 goto out;
25a9939b 1603 }
0ad19a3f 1604
1605 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1606 goto out;
1607
1608 nla_end_nested(nlmsg, nest3);
0ad19a3f 1609 nla_end_nested(nlmsg, nest2);
0ad19a3f 1610 nla_end_nested(nlmsg, nest1);
1611
1612 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1613 goto out;
1614
3cfc0f3a 1615 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1616out:
1617 netlink_close(&nlh);
1618 nlmsg_free(answer);
1619 nlmsg_free(nlmsg);
1620 return err;
1621}
1622
ebc73a67 1623/* TODO: merge with lxc_macvlan_create */
7c11d57a 1624int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1625{
ebc73a67 1626 int err, len, lindex;
06f976ca 1627 struct ifinfomsg *ifi;
ebc73a67 1628 struct nl_handler nlh;
26c39028 1629 struct rtattr *nest, *nest2;
ebc73a67 1630 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1631
3cfc0f3a
MN
1632 err = netlink_open(&nlh, NETLINK_ROUTE);
1633 if (err)
1634 return err;
26c39028 1635
3cfc0f3a 1636 err = -EINVAL;
26c39028 1637 len = strlen(master);
dae3fdf6 1638 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1639 goto err3;
1640
1641 len = strlen(name);
dae3fdf6 1642 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1643 goto err3;
1644
3cfc0f3a 1645 err = -ENOMEM;
26c39028
JHS
1646 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1647 if (!nlmsg)
1648 goto err3;
1649
06f976ca 1650 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1651 if (!answer)
1652 goto err2;
1653
3cfc0f3a 1654 err = -EINVAL;
26c39028
JHS
1655 lindex = if_nametoindex(master);
1656 if (!lindex)
1657 goto err1;
1658
06f976ca 1659 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1660 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1661 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1662
1663 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1664 if (!ifi) {
1665 err = -ENOMEM;
1666 goto err1;
1667 }
06f976ca 1668 ifi->ifi_family = AF_UNSPEC;
26c39028 1669
79e68309 1670 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1671 if (!nest)
1672 goto err1;
1673
1674 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1675 goto err1;
1676
1677 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1678 if (!nest2)
1679 goto err1;
e892973e 1680
26c39028
JHS
1681 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1682 goto err1;
e892973e 1683
26c39028 1684 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1685 nla_end_nested(nlmsg, nest);
1686
1687 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1688 goto err1;
1689
1690 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1691 goto err1;
1692
3cfc0f3a 1693 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1694err1:
1695 nlmsg_free(answer);
1696err2:
1697 nlmsg_free(nlmsg);
1698err3:
1699 netlink_close(&nlh);
1700 return err;
1701}
1702
e892973e 1703int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1704{
ebc73a67 1705 int err, index, len;
06f976ca 1706 struct ifinfomsg *ifi;
ebc73a67 1707 struct nl_handler nlh;
e892973e 1708 struct rtattr *nest, *nest2;
ebc73a67 1709 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1710
3cfc0f3a
MN
1711 err = netlink_open(&nlh, NETLINK_ROUTE);
1712 if (err)
1713 return err;
0ad19a3f 1714
3cfc0f3a 1715 err = -EINVAL;
0ad19a3f 1716 len = strlen(master);
dae3fdf6 1717 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1718 goto out;
1719
1720 len = strlen(name);
dae3fdf6 1721 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1722 goto out;
1723
3cfc0f3a 1724 err = -ENOMEM;
0ad19a3f 1725 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1726 if (!nlmsg)
1727 goto out;
1728
06f976ca 1729 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1730 if (!answer)
1731 goto out;
1732
3cfc0f3a 1733 err = -EINVAL;
0ad19a3f 1734 index = if_nametoindex(master);
1735 if (!index)
1736 goto out;
1737
06f976ca 1738 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1739 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1740 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1741
1742 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1743 if (!ifi) {
1744 err = -ENOMEM;
1745 goto out;
1746 }
06f976ca 1747 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1748
79e68309 1749 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1750 if (!nest)
1751 goto out;
1752
1753 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1754 goto out;
1755
e892973e
DL
1756 if (mode) {
1757 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1758 if (!nest2)
1759 goto out;
1760
1761 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1762 goto out;
1763
1764 nla_end_nested(nlmsg, nest2);
1765 }
1766
0ad19a3f 1767 nla_end_nested(nlmsg, nest);
1768
1769 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1770 goto out;
1771
1772 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1773 goto out;
1774
3cfc0f3a 1775 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1776out:
1777 netlink_close(&nlh);
1778 nlmsg_free(answer);
1779 nlmsg_free(nlmsg);
1780 return err;
1781}
1782
1783static int proc_sys_net_write(const char *path, const char *value)
1784{
ebc73a67
CB
1785 int fd;
1786 int err = 0;
0ad19a3f 1787
1788 fd = open(path, O_WRONLY);
1789 if (fd < 0)
1790 return -errno;
1791
f640cf46 1792 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1793 err = -errno;
1794
1795 close(fd);
1796 return err;
1797}
1798
6509154d 1799static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1800{
1801 int ret;
1802 char path[PATH_MAX];
1803 char buf[1] = "";
1804
1805 if (family != AF_INET && family != AF_INET6)
1806 return minus_one_set_errno(EINVAL);
1807
1808 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1809 family == AF_INET ? "ipv4" : "ipv6", ifname,
1810 "forwarding");
1811 if (ret < 0 || (size_t)ret >= PATH_MAX)
1812 return minus_one_set_errno(E2BIG);
1813
1814 return lxc_read_file_expect(path, buf, 1, "1");
1815}
1816
0ad19a3f 1817static int neigh_proxy_set(const char *ifname, int family, int flag)
1818{
9ba8130c 1819 int ret;
419590da 1820 char path[PATH_MAX];
0ad19a3f 1821
1822 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1823 return -EINVAL;
0ad19a3f 1824
419590da 1825 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1826 family == AF_INET ? "ipv4" : "ipv6", ifname,
1827 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1828 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1829 return -E2BIG;
0ad19a3f 1830
ebc73a67 1831 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1832}
1833
6509154d 1834static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1835{
1836 int ret;
1837 char path[PATH_MAX];
1838 char buf[1] = "";
1839
1840 if (family != AF_INET && family != AF_INET6)
1841 return minus_one_set_errno(EINVAL);
1842
1843 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1844 family == AF_INET ? "ipv4" : "ipv6", ifname,
1845 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1846 if (ret < 0 || (size_t)ret >= PATH_MAX)
1847 return minus_one_set_errno(E2BIG);
1848
1849 return lxc_read_file_expect(path, buf, 1, "1");
1850}
1851
497353b6 1852int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1853{
1854 return neigh_proxy_set(name, family, 1);
1855}
1856
497353b6 1857int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1858{
1859 return neigh_proxy_set(name, family, 0);
1860}
1861
1862int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1863{
1f1b18e7
DL
1864 int i = 0;
1865 unsigned val;
ebc73a67
CB
1866 char c;
1867 unsigned char *data;
1f1b18e7
DL
1868
1869 sockaddr->sa_family = ARPHRD_ETHER;
1870 data = (unsigned char *)sockaddr->sa_data;
1871
1872 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1873 c = *macaddr++;
1874 if (isdigit(c))
1875 val = c - '0';
1876 else if (c >= 'a' && c <= 'f')
1877 val = c - 'a' + 10;
1878 else if (c >= 'A' && c <= 'F')
1879 val = c - 'A' + 10;
1880 else
1881 return -EINVAL;
1882
1883 val <<= 4;
1884 c = *macaddr;
1885 if (isdigit(c))
1886 val |= c - '0';
1887 else if (c >= 'a' && c <= 'f')
1888 val |= c - 'a' + 10;
1889 else if (c >= 'A' && c <= 'F')
1890 val |= c - 'A' + 10;
1891 else if (c == ':' || c == 0)
1892 val >>= 4;
1893 else
1894 return -EINVAL;
1895 if (c != 0)
1896 macaddr++;
1897 *data++ = (unsigned char)(val & 0377);
1898 i++;
1899
1900 if (*macaddr == ':')
1901 macaddr++;
0ad19a3f 1902 }
0ad19a3f 1903
1f1b18e7 1904 return 0;
0ad19a3f 1905}
1906
ebc73a67
CB
1907static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1908 void *acast, int prefix)
0ad19a3f 1909{
ebc73a67 1910 int addrlen, err;
06f976ca 1911 struct ifaddrmsg *ifa;
ebc73a67
CB
1912 struct nl_handler nlh;
1913 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1914
ebc73a67
CB
1915 addrlen = family == AF_INET ? sizeof(struct in_addr)
1916 : sizeof(struct in6_addr);
4bf1968d 1917
3cfc0f3a
MN
1918 err = netlink_open(&nlh, NETLINK_ROUTE);
1919 if (err)
1920 return err;
0ad19a3f 1921
3cfc0f3a 1922 err = -ENOMEM;
0ad19a3f 1923 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1924 if (!nlmsg)
1925 goto out;
1926
06f976ca 1927 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1928 if (!answer)
1929 goto out;
1930
06f976ca 1931 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1932 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1933 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1934
1935 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1936 if (!ifa)
25a9939b 1937 goto out;
06f976ca
SZ
1938 ifa->ifa_prefixlen = prefix;
1939 ifa->ifa_index = ifindex;
1940 ifa->ifa_family = family;
1941 ifa->ifa_scope = 0;
acf47e1b 1942
3cfc0f3a 1943 err = -EINVAL;
4bf1968d 1944 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1945 goto out;
1946
4bf1968d 1947 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1948 goto out;
1949
d8948a52 1950 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1951 goto out;
1952
ebc73a67 1953 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1954 err = -EPROTONOSUPPORT;
79881dc6
DL
1955 if (family == AF_INET6 &&
1956 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1957 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1958 goto out;
0ad19a3f 1959
3cfc0f3a 1960 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1961out:
1962 netlink_close(&nlh);
1963 nlmsg_free(answer);
1964 nlmsg_free(nlmsg);
1965 return err;
1966}
1967
1f1b18e7 1968int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1969 struct in6_addr *mcast, struct in6_addr *acast,
1970 int prefix)
1f1b18e7
DL
1971{
1972 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1973}
1974
ebc73a67
CB
1975int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1976 int prefix)
1f1b18e7
DL
1977{
1978 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1979}
1980
ebc73a67
CB
1981/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1982 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1983 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1984 */
6ce39620
CB
1985#pragma GCC diagnostic push
1986#pragma GCC diagnostic ignored "-Wcast-align"
1987
ebc73a67
CB
1988static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1989{
1990 int addrlen;
06f976ca
SZ
1991 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1992 struct rtattr *rta = IFA_RTA(ifa);
1993 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1994
06f976ca 1995 if (ifa->ifa_family != family)
19a26f82
MK
1996 return 0;
1997
ebc73a67
CB
1998 addrlen = family == AF_INET ? sizeof(struct in_addr)
1999 : sizeof(struct in6_addr);
19a26f82
MK
2000
2001 /* Loop over the rtattr's in this message */
ebc73a67 2002 while (RTA_OK(rta, attr_len)) {
19a26f82 2003 /* Found a local address for the requested interface,
ebc73a67
CB
2004 * return it.
2005 */
2006 if (rta->rta_type == IFA_LOCAL ||
2007 rta->rta_type == IFA_ADDRESS) {
2008 /* Sanity check. The family check above should make sure
2009 * the address length is correct, but check here just in
2010 * case.
2011 */
19a26f82
MK
2012 if (RTA_PAYLOAD(rta) != addrlen)
2013 return -1;
2014
ebc73a67
CB
2015 /* We might have found an IFA_ADDRESS before, which we
2016 * now overwrite with an IFA_LOCAL.
2017 */
dd66e5ad 2018 if (!*res) {
19a26f82 2019 *res = malloc(addrlen);
dd66e5ad
DE
2020 if (!*res)
2021 return -1;
2022 }
19a26f82
MK
2023
2024 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2025 if (rta->rta_type == IFA_LOCAL)
2026 break;
2027 }
2028 rta = RTA_NEXT(rta, attr_len);
2029 }
2030 return 0;
2031}
2032
6ce39620
CB
2033#pragma GCC diagnostic pop
2034
19a26f82
MK
2035static int ip_addr_get(int family, int ifindex, void **res)
2036{
ebc73a67 2037 int answer_len, err;
06f976ca 2038 struct ifaddrmsg *ifa;
ebc73a67 2039 struct nl_handler nlh;
19a26f82 2040 struct nlmsghdr *msg;
ebc73a67
CB
2041 int readmore = 0, recv_len = 0;
2042 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2043
2044 err = netlink_open(&nlh, NETLINK_ROUTE);
2045 if (err)
2046 return err;
2047
2048 err = -ENOMEM;
2049 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2050 if (!nlmsg)
2051 goto out;
2052
06f976ca 2053 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2054 if (!answer)
2055 goto out;
2056
ebc73a67
CB
2057 /* Save the answer buffer length, since it will be overwritten on the
2058 * first receive (and we might need to receive more than once).
2059 */
06f976ca
SZ
2060 answer_len = answer->nlmsghdr->nlmsg_len;
2061
ebc73a67 2062 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2063 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2064
06f976ca 2065 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2066 if (!ifa)
2067 goto out;
06f976ca 2068 ifa->ifa_family = family;
19a26f82 2069
ebc73a67
CB
2070 /* Send the request for addresses, which returns all addresses on all
2071 * interfaces.
2072 */
19a26f82
MK
2073 err = netlink_send(&nlh, nlmsg);
2074 if (err < 0)
2075 goto out;
19a26f82 2076
6ce39620
CB
2077#pragma GCC diagnostic push
2078#pragma GCC diagnostic ignored "-Wcast-align"
2079
19a26f82
MK
2080 do {
2081 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2082 * overwritten by a previous receive.
2083 */
06f976ca 2084 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2085
ebc73a67 2086 /* Get the (next) batch of reply messages. */
19a26f82
MK
2087 err = netlink_rcv(&nlh, answer);
2088 if (err < 0)
2089 goto out;
2090
2091 recv_len = err;
2092 err = 0;
2093
ebc73a67 2094 /* Satisfy the typing for the netlink macros. */
06f976ca 2095 msg = answer->nlmsghdr;
19a26f82
MK
2096
2097 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2098 /* Stop reading if we see an error message. */
19a26f82 2099 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2100 struct nlmsgerr *errmsg =
2101 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2102 err = errmsg->error;
2103 goto out;
2104 }
2105
ebc73a67 2106 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2107 if (msg->nlmsg_type == NLMSG_DONE) {
2108 readmore = 0;
2109 break;
2110 }
2111
2112 if (msg->nlmsg_type != RTM_NEWADDR) {
2113 err = -1;
2114 goto out;
2115 }
2116
06f976ca
SZ
2117 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2118 if (ifa->ifa_index == ifindex) {
2119 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2120 err = -1;
2121 goto out;
2122 }
2123
ebc73a67 2124 /* Found a result, stop searching. */
19a26f82
MK
2125 if (*res)
2126 goto out;
2127 }
2128
ebc73a67
CB
2129 /* Keep reading more data from the socket if the last
2130 * message had the NLF_F_MULTI flag set.
2131 */
19a26f82
MK
2132 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2133
ebc73a67 2134 /* Look at the next message received in this buffer. */
19a26f82
MK
2135 msg = NLMSG_NEXT(msg, recv_len);
2136 }
2137 } while (readmore);
2138
6ce39620
CB
2139#pragma GCC diagnostic pop
2140
19a26f82 2141 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2142 * error.
2143 */
19a26f82
MK
2144 err = -1;
2145
2146out:
2147 netlink_close(&nlh);
2148 nlmsg_free(answer);
2149 nlmsg_free(nlmsg);
2150 return err;
2151}
2152
2153int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2154{
ebc73a67 2155 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2156}
2157
ebc73a67 2158int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2159{
ebc73a67 2160 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2161}
2162
f8fee0e2
MK
2163static int ip_gateway_add(int family, int ifindex, void *gw)
2164{
ebc73a67 2165 int addrlen, err;
f8fee0e2 2166 struct nl_handler nlh;
06f976ca 2167 struct rtmsg *rt;
ebc73a67 2168 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2169
ebc73a67
CB
2170 addrlen = family == AF_INET ? sizeof(struct in_addr)
2171 : sizeof(struct in6_addr);
f8fee0e2
MK
2172
2173 err = netlink_open(&nlh, NETLINK_ROUTE);
2174 if (err)
2175 return err;
2176
2177 err = -ENOMEM;
2178 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2179 if (!nlmsg)
2180 goto out;
2181
06f976ca 2182 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2183 if (!answer)
2184 goto out;
2185
06f976ca 2186 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2187 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2188 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2189
2190 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2191 if (!rt)
2192 goto out;
06f976ca
SZ
2193 rt->rtm_family = family;
2194 rt->rtm_table = RT_TABLE_MAIN;
2195 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2196 rt->rtm_protocol = RTPROT_BOOT;
2197 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2198 /* "default" destination */
06f976ca 2199 rt->rtm_dst_len = 0;
f8fee0e2
MK
2200
2201 err = -EINVAL;
a2f9a670 2202
2203 /* If gateway address not supplied, then a device route will be created instead */
2204 if (gw != NULL) {
2205 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2206 goto out;
2207 }
f8fee0e2
MK
2208
2209 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2210 * addresses for the gateway.
2211 */
f8fee0e2
MK
2212 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2213 goto out;
2214
2215 err = netlink_transaction(&nlh, nlmsg, answer);
2216out:
2217 netlink_close(&nlh);
2218 nlmsg_free(answer);
2219 nlmsg_free(nlmsg);
2220 return err;
2221}
2222
2223int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2224{
2225 return ip_gateway_add(AF_INET, ifindex, gw);
2226}
2227
2228int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2229{
2230 return ip_gateway_add(AF_INET6, ifindex, gw);
2231}
581c75e7 2232bool is_ovs_bridge(const char *bridge)
0d204771 2233{
ebc73a67 2234 int ret;
0d204771 2235 struct stat sb;
ebc73a67 2236 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2237
ebc73a67
CB
2238 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2239 bridge);
2240 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2241 return false;
2242
2243 ret = stat(brdirname, &sb);
2244 if (ret < 0 && errno == ENOENT)
0d204771 2245 return true;
ebc73a67 2246
0d204771
SH
2247 return false;
2248}
2249
581c75e7
CB
2250struct ovs_veth_args {
2251 const char *bridge;
2252 const char *nic;
2253};
2254
cb0dc11b
CB
2255/* Called from a background thread - when nic goes away, remove it from the
2256 * bridge.
c43cbc04 2257 */
581c75e7 2258static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2259{
581c75e7 2260 struct ovs_veth_args *args = data;
cb0dc11b 2261
581c75e7
CB
2262 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2263 (char *)NULL);
2264 return -1;
c43cbc04
SH
2265}
2266
581c75e7 2267int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2268{
c43cbc04 2269 int ret;
419590da 2270 char cmd_output[PATH_MAX];
581c75e7 2271 struct ovs_veth_args args;
6ad22d06 2272
581c75e7
CB
2273 args.bridge = bridge;
2274 args.nic = nic;
2275 ret = run_command(cmd_output, sizeof(cmd_output),
2276 lxc_ovs_delete_port_exec, (void *)&args);
2277 if (ret < 0) {
2278 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2279 "%s", bridge, nic, cmd_output);
6ad22d06 2280 return -1;
581c75e7 2281 }
0d204771 2282
581c75e7
CB
2283 return 0;
2284}
ebc73a67 2285
581c75e7
CB
2286static int lxc_ovs_attach_bridge_exec(void *data)
2287{
2288 struct ovs_veth_args *args = data;
ebc73a67 2289
581c75e7
CB
2290 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2291 (char *)NULL);
2292 return -1;
2293}
ebc73a67 2294
581c75e7
CB
2295static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2296{
2297 int ret;
419590da 2298 char cmd_output[PATH_MAX];
581c75e7 2299 struct ovs_veth_args args;
ebc73a67 2300
581c75e7
CB
2301 args.bridge = bridge;
2302 args.nic = nic;
2303 ret = run_command(cmd_output, sizeof(cmd_output),
2304 lxc_ovs_attach_bridge_exec, (void *)&args);
2305 if (ret < 0) {
2306 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2307 bridge, nic, cmd_output);
2308 return -1;
c43cbc04 2309 }
0d204771 2310
581c75e7 2311 return 0;
0d204771 2312}
0d204771 2313
581c75e7 2314int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2315{
ebc73a67 2316 int err, fd, index;
9de31d5a 2317 size_t retlen;
0ad19a3f 2318 struct ifreq ifr;
2319
dae3fdf6 2320 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2321 return -EINVAL;
0ad19a3f 2322
2323 index = if_nametoindex(ifname);
2324 if (!index)
3cfc0f3a 2325 return -EINVAL;
0ad19a3f 2326
0d204771 2327 if (is_ovs_bridge(bridge))
581c75e7 2328 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2329
ad9429e5 2330 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2331 if (fd < 0)
3cfc0f3a 2332 return -errno;
0ad19a3f 2333
9de31d5a 2334 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2335 if (retlen >= IFNAMSIZ) {
2336 close(fd);
9de31d5a 2337 return -E2BIG;
42cc4083 2338 }
9de31d5a 2339
ebc73a67 2340 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2341 ifr.ifr_ifindex = index;
7d163508 2342 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2343 close(fd);
3cfc0f3a
MN
2344 if (err)
2345 err = -errno;
0ad19a3f 2346
2347 return err;
2348}
72d0e1cb 2349
ebc73a67 2350static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2351 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2352 [LXC_NET_VETH] = "veth",
2353 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2354 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2355 [LXC_NET_PHYS] = "phys",
b343592b
BP
2356 [LXC_NET_VLAN] = "vlan",
2357 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2358};
2359
2360const char *lxc_net_type_to_str(int type)
2361{
2362 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2363 return NULL;
ebc73a67 2364
72d0e1cb
SG
2365 return lxc_network_types[type];
2366}
8befa924 2367
ebc73a67 2368static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2369
966e9f1f 2370char *lxc_mkifname(char *template)
a0265685 2371{
2d7bf744 2372 int ret;
b1e44ed1 2373 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2374 char name[IFNAMSIZ];
2375 bool exists = false;
2376 size_t i = 0;
280cc35f 2377#ifdef HAVE_RAND_R
2378 unsigned int seed;
2379
2380 seed = randseed(false);
2381#else
2382
2383 (void)randseed(true);
2384#endif
a0265685 2385
535e8859
CB
2386 if (strlen(template) >= IFNAMSIZ)
2387 return NULL;
2388
ebc73a67 2389 /* Get all the network interfaces. */
b1e44ed1 2390 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2391 if (ret < 0) {
6d1400b5 2392 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2393 return NULL;
2394 }
a0265685 2395
ebc73a67 2396 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2397 for (;;) {
966e9f1f 2398 name[0] = '\0';
94b1cade 2399 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2400
966e9f1f 2401 exists = false;
280cc35f 2402
a0265685
SG
2403 for (i = 0; i < strlen(name); i++) {
2404 if (name[i] == 'X') {
2405#ifdef HAVE_RAND_R
8523344a 2406 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2407#else
8523344a 2408 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2409#endif
2410 }
2411 }
2412
2413 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2414 if (!strcmp(ifa->ifa_name, name)) {
2415 exists = true;
a0265685
SG
2416 break;
2417 }
2418 }
2419
966e9f1f 2420 if (!exists)
a0265685 2421 break;
a0265685
SG
2422 }
2423
b1e44ed1 2424 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2425 (void)strlcpy(template, name, strlen(template) + 1);
2426
2427 return template;
a0265685
SG
2428}
2429
8befa924
SH
2430int setup_private_host_hw_addr(char *veth1)
2431{
ebc73a67 2432 int err, sockfd;
8befa924 2433 struct ifreq ifr;
8befa924 2434
ad9429e5 2435 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2436 if (sockfd < 0)
2437 return -errno;
2438
ebc73a67 2439 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2440 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2441 close(sockfd);
ebc73a67 2442 return -E2BIG;
87c6e5db 2443 }
ebc73a67 2444
8befa924
SH
2445 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2446 if (err < 0) {
8befa924 2447 close(sockfd);
8befa924
SH
2448 return -errno;
2449 }
2450
2451 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2452 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2453 close(sockfd);
8befa924
SH
2454 if (err < 0)
2455 return -errno;
2456
2457 return 0;
2458}
811ef482
CB
2459
2460int lxc_find_gateway_addresses(struct lxc_handler *handler)
2461{
2462 struct lxc_list *network = &handler->conf->network;
2463 struct lxc_list *iterator;
2464 struct lxc_netdev *netdev;
2465 int link_index;
2466
2467 lxc_list_for_each(iterator, network) {
2468 netdev = iterator->elem;
2469
2470 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2471 continue;
2472
2473 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2474 ERROR("Automatic gateway detection is only supported "
2475 "for veth and macvlan");
2476 return -1;
2477 }
2478
de4855a8 2479 if (netdev->link[0] == '\0') {
811ef482
CB
2480 ERROR("Automatic gateway detection needs a link interface");
2481 return -1;
2482 }
2483
2484 link_index = if_nametoindex(netdev->link);
2485 if (!link_index)
2486 return -EINVAL;
2487
2488 if (netdev->ipv4_gateway_auto) {
2489 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2490 ERROR("Failed to automatically find ipv4 gateway "
2491 "address from link interface \"%s\"", netdev->link);
2492 return -1;
2493 }
2494 }
2495
2496 if (netdev->ipv6_gateway_auto) {
2497 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2498 ERROR("Failed to automatically find ipv6 gateway "
2499 "address from link interface \"%s\"", netdev->link);
2500 return -1;
2501 }
2502 }
2503 }
2504
2505 return 0;
2506}
2507
2508#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2509static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2510 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2511{
2512 int ret;
2513 pid_t child;
2514 int bytes, pipefd[2];
2515 char *token, *saveptr = NULL;
095ead80 2516 char netdev_link[IFNAMSIZ];
419590da 2517 char buffer[PATH_MAX] = {0};
94b1cade 2518 size_t retlen;
811ef482
CB
2519
2520 if (netdev->type != LXC_NET_VETH) {
2521 ERROR("Network type %d not support for unprivileged use", netdev->type);
2522 return -1;
2523 }
2524
2525 ret = pipe(pipefd);
2526 if (ret < 0) {
2527 SYSERROR("Failed to create pipe");
2528 return -1;
2529 }
2530
2531 child = fork();
2532 if (child < 0) {
2533 SYSERROR("Failed to create new process");
2534 close(pipefd[0]);
2535 close(pipefd[1]);
2536 return -1;
2537 }
2538
2539 if (child == 0) {
8335fd40 2540 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2541
2542 close(pipefd[0]);
2543
2544 ret = dup2(pipefd[1], STDOUT_FILENO);
2545 if (ret >= 0)
2546 ret = dup2(pipefd[1], STDERR_FILENO);
2547 close(pipefd[1]);
2548 if (ret < 0) {
2549 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2550 _exit(EXIT_FAILURE);
811ef482
CB
2551 }
2552
de4855a8 2553 if (netdev->link[0] != '\0')
9de31d5a 2554 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2555 else
9de31d5a
CB
2556 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2557 if (retlen >= IFNAMSIZ) {
2558 SYSERROR("Invalid network device name");
2559 _exit(EXIT_FAILURE);
2560 }
811ef482 2561
8335fd40
CB
2562 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2563 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2564 _exit(EXIT_FAILURE);
8335fd40 2565 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2566
2567 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2568 lxcname, pidstr, netdev_link,
de4855a8
CB
2569 netdev->name[0] != '\0' ? netdev->name : "(null)");
2570 if (netdev->name[0] != '\0')
811ef482
CB
2571 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2572 lxcpath, lxcname, pidstr, "veth", netdev_link,
2573 netdev->name, (char *)NULL);
2574 else
2575 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2576 lxcpath, lxcname, pidstr, "veth", netdev_link,
2577 (char *)NULL);
2578 SYSERROR("Failed to execute lxc-user-nic");
78070056 2579 _exit(EXIT_FAILURE);
811ef482
CB
2580 }
2581
2582 /* close the write-end of the pipe */
2583 close(pipefd[1]);
2584
419590da 2585 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2586 if (bytes < 0) {
74c6e2b0 2587 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2588 close(pipefd[0]);
6b9f82a9
CB
2589 } else {
2590 buffer[bytes - 1] = '\0';
811ef482 2591 }
811ef482
CB
2592
2593 ret = wait_for_pid(child);
2594 close(pipefd[0]);
6b9f82a9 2595 if (ret != 0 || bytes < 0) {
811ef482
CB
2596 ERROR("lxc-user-nic failed to configure requested network: %s",
2597 buffer[0] != '\0' ? buffer : "(null)");
2598 return -1;
2599 }
2600 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2601
2602 /* netdev->name */
2603 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2604 if (!token) {
2605 ERROR("Failed to parse lxc-user-nic output");
811ef482 2606 return -1;
74c6e2b0 2607 }
811ef482 2608
e389f2af
CB
2609 /*
2610 * lxc-user-nic will take care of proper network device naming. So
2611 * netdev->name and netdev->created_name need to be identical to not
2612 * trigger another rename later on.
2613 */
2614 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2615 if (retlen < IFNAMSIZ)
2616 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2617 if (retlen >= IFNAMSIZ) {
2618 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2619 return -E2BIG;
2620 }
811ef482 2621
74c6e2b0 2622 /* netdev->ifindex */
811ef482 2623 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2624 if (!token) {
2625 ERROR("Failed to parse lxc-user-nic output");
811ef482 2626 return -1;
74c6e2b0 2627 }
811ef482 2628
74c6e2b0
CB
2629 ret = lxc_safe_int(token, &netdev->ifindex);
2630 if (ret < 0) {
6d1400b5 2631 errno = -ret;
2632 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2633 return -1;
2634 }
2635
74c6e2b0 2636 /* netdev->priv.veth_attr.veth1 */
811ef482 2637 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2638 if (!token) {
2639 ERROR("Failed to parse lxc-user-nic output");
811ef482 2640 return -1;
74c6e2b0 2641 }
811ef482 2642
94b1cade
DJ
2643 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2644 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2645 ERROR("Host side veth device name returned by lxc-user-nic is "
2646 "too long");
2647 return -E2BIG;
2648 }
74c6e2b0
CB
2649
2650 /* netdev->priv.veth_attr.ifindex */
2651 token = strtok_r(NULL, ":", &saveptr);
2652 if (!token) {
2653 ERROR("Failed to parse lxc-user-nic output");
2654 return -1;
2655 }
2656
2657 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2658 if (ret < 0) {
6d1400b5 2659 errno = -ret;
2660 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2661 return -1;
2662 }
2663
4d781681 2664 if (netdev->upscript) {
2665 char *argv[] = {
2666 "veth",
2667 netdev->link,
2668 netdev->priv.veth_attr.veth1,
2669 NULL,
2670 };
2671
e389f2af
CB
2672 ret = run_script_argv(lxcname, hooks_version, "net",
2673 netdev->upscript, "up", argv);
4d781681 2674 if (ret < 0)
2675 return -1;
2676 }
2677
811ef482
CB
2678 return 0;
2679}
2680
f0ecc19d 2681static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2682 struct lxc_netdev *netdev,
2683 const char *netns_path)
811ef482
CB
2684{
2685 int bytes, ret;
2686 pid_t child;
2687 int pipefd[2];
419590da 2688 char buffer[PATH_MAX] = {0};
811ef482
CB
2689
2690 if (netdev->type != LXC_NET_VETH) {
2691 ERROR("Network type %d not support for unprivileged use", netdev->type);
2692 return -1;
2693 }
2694
2695 ret = pipe(pipefd);
2696 if (ret < 0) {
2697 SYSERROR("Failed to create pipe");
2698 return -1;
2699 }
2700
2701 child = fork();
2702 if (child < 0) {
2703 SYSERROR("Failed to create new process");
2704 close(pipefd[0]);
2705 close(pipefd[1]);
2706 return -1;
2707 }
2708
2709 if (child == 0) {
8843fde4 2710 char *hostveth;
811ef482
CB
2711
2712 close(pipefd[0]);
2713
2714 ret = dup2(pipefd[1], STDOUT_FILENO);
2715 if (ret >= 0)
2716 ret = dup2(pipefd[1], STDERR_FILENO);
2717 close(pipefd[1]);
2718 if (ret < 0) {
2719 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2720 _exit(EXIT_FAILURE);
811ef482
CB
2721 }
2722
8843fde4
CB
2723 if (netdev->priv.veth_attr.pair[0] != '\0')
2724 hostveth = netdev->priv.veth_attr.pair;
2725 else
2726 hostveth = netdev->priv.veth_attr.veth1;
2727 if (hostveth[0] == '\0') {
74c6e2b0 2728 SYSERROR("Host side veth device name is missing");
a30b9023 2729 _exit(EXIT_FAILURE);
74c6e2b0
CB
2730 }
2731
de4855a8 2732 if (netdev->link[0] == '\0') {
811ef482 2733 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2734 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2735 _exit(EXIT_FAILURE);
74c6e2b0 2736 }
811ef482 2737
811ef482 2738 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2739 lxcname, netns_path, netdev->link, hostveth);
811ef482 2740 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2741 lxcname, netns_path, "veth", netdev->link, hostveth,
2742 (char *)NULL);
811ef482 2743 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2744 _exit(EXIT_FAILURE);
811ef482
CB
2745 }
2746
2747 close(pipefd[1]);
2748
419590da 2749 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2750 if (bytes < 0) {
2751 SYSERROR("Failed to read from pipe file descriptor.");
2752 close(pipefd[0]);
6b9f82a9
CB
2753 } else {
2754 buffer[bytes - 1] = '\0';
811ef482 2755 }
811ef482 2756
6b9f82a9
CB
2757 ret = wait_for_pid(child);
2758 close(pipefd[0]);
2759 if (ret != 0 || bytes < 0) {
811ef482
CB
2760 ERROR("lxc-user-nic failed to delete requested network: %s",
2761 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2762 return -1;
2763 }
2764
811ef482
CB
2765 return 0;
2766}
2767
1bd8d726
CB
2768bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2769{
2770 int ret;
2771 struct lxc_list *iterator;
2772 struct lxc_list *network = &handler->conf->network;
2773 /* strlen("/proc/") = 6
2774 * +
8335fd40 2775 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2776 * +
2777 * strlen("/fd/") = 4
2778 * +
8335fd40 2779 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2780 * +
2781 * \0
2782 */
8335fd40 2783 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2784
2785 *netns_path = '\0';
2786
28d9e29e 2787 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2788 DEBUG("Cannot not guarantee safe deletion of network devices. "
2789 "Manual cleanup maybe needed");
2790 return false;
2791 }
2792
2793 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2794 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2795 if (ret < 0 || ret >= sizeof(netns_path))
2796 return false;
2797
2798 lxc_list_for_each(iterator, network) {
2799 char *hostveth = NULL;
2800 struct lxc_netdev *netdev = iterator->elem;
2801
2802 /* We can only delete devices whose ifindex we have. If we don't
2803 * have the index it means that we didn't create it.
2804 */
2805 if (!netdev->ifindex)
2806 continue;
2807
2808 if (netdev->type == LXC_NET_PHYS) {
2809 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2810 netdev->link);
2811 if (ret < 0)
2812 WARN("Failed to rename interface with index %d "
2813 "to its initial name \"%s\"",
2814 netdev->ifindex, netdev->link);
2815 else
2816 TRACE("Renamed interface with index %d to its "
2817 "initial name \"%s\"",
2818 netdev->ifindex, netdev->link);
b3259dc6
TP
2819
2820 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2821 goto clear_ifindices;
1bd8d726
CB
2822 }
2823
2824 ret = netdev_deconf[netdev->type](handler, netdev);
2825 if (ret < 0)
2826 WARN("Failed to deconfigure network device");
2827
2828 if (netdev->type != LXC_NET_VETH)
66a7c406 2829 goto clear_ifindices;
1bd8d726 2830
c869be20 2831 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2832 goto clear_ifindices;
1bd8d726 2833
8843fde4
CB
2834 if (netdev->priv.veth_attr.pair[0] != '\0')
2835 hostveth = netdev->priv.veth_attr.pair;
2836 else
2837 hostveth = netdev->priv.veth_attr.veth1;
2838 if (hostveth[0] == '\0')
66a7c406 2839 goto clear_ifindices;
8843fde4 2840
1bd8d726
CB
2841 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2842 handler->name, netdev,
2843 netns_path);
2844 if (ret < 0) {
1bd8d726 2845 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2846 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2847 goto clear_ifindices;
1bd8d726
CB
2848 }
2849 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2850 netdev->link);
66a7c406
CB
2851
2852clear_ifindices:
ad2ddfcd 2853 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2854 * have cached stale data which would cause it to fail on reboot
2855 * we're we don't re-read the on-disk config file.
2856 */
2857 netdev->ifindex = 0;
2858 if (netdev->type == LXC_NET_PHYS) {
2859 netdev->priv.phys_attr.ifindex = 0;
2860 } else if (netdev->type == LXC_NET_VETH) {
2861 netdev->priv.veth_attr.veth1[0] = '\0';
2862 netdev->priv.veth_attr.ifindex = 0;
2863 }
1bd8d726
CB
2864 }
2865
bb84beda 2866 return true;
1bd8d726
CB
2867}
2868
6509154d 2869struct ip_proxy_args {
2870 const char *ip;
2871 const char *dev;
2872};
2873
2874static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2875{
2876 struct ip_proxy_args *args = data;
2877
2878 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2879 return -1;
2880}
2881
2882static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2883{
2884 struct ip_proxy_args *args = data;
2885
2886 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2887 return -1;
2888}
2889
2890static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2891{
2892 int ret;
2893 char cmd_output[PATH_MAX];
2894 struct ip_proxy_args args = {
2895 .ip = ip,
2896 .dev = dev,
2897 };
2898
2899 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2900 if (ret < 0) {
2901 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2902 return -1;
2903 }
2904
2905 return 0;
2906}
2907
2908static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2909{
2910 int ret;
2911 char cmd_output[PATH_MAX];
2912 struct ip_proxy_args args = {
2913 .ip = ip,
2914 .dev = dev,
2915 };
2916
2917 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2918 if (ret < 0) {
2919 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2920 return -1;
2921 }
2922
2923 return 0;
2924}
2925
2926static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2927 struct lxc_list *cur, *next;
2928 struct lxc_inetdev *inet4dev;
2929 struct lxc_inet6dev *inet6dev;
2930 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2931 int err = 0;
2932 unsigned int lo_ifindex = 0;
6509154d 2933
2934 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2935 if (!lxc_list_empty(&netdev->ipv4)) {
2936 /* Check for net.ipv4.conf.[link].forwarding=1 */
2937 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2938 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2939 return minus_one_set_errno(EINVAL);
2940 }
2941 }
2942
2943 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2944 if (!lxc_list_empty(&netdev->ipv6)) {
2945 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2946 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2947 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2948 return minus_one_set_errno(EINVAL);
2949 }
2950
2951 /* Check for net.ipv6.conf.[link].forwarding=1 */
2952 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2953 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2954 return minus_one_set_errno(EINVAL);
2955 }
2956 }
2957
b670016a 2958 /* Perform IPVLAN specific checks. */
2959 if (netdev->type == LXC_NET_IPVLAN) {
2960 /* Check mode is l3s as other modes do not work with l2proxy. */
2961 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2962 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2963 return minus_one_set_errno(EINVAL);
2964 }
2965
2966 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2967 lo_ifindex = if_nametoindex(loop_device);
b670016a 2968 if (lo_ifindex == 0) {
3ebffb98 2969 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2970 return minus_one_set_errno(EINVAL);
2971 }
2972 }
2973
6509154d 2974 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2975 inet4dev = cur->elem;
2976 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2977 return minus_one_set_errno(-errno);
2978
2979 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2980 return minus_one_set_errno(EINVAL);
b670016a 2981
2982 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2983 if (netdev->type == LXC_NET_IPVLAN) {
2984 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2985 if (err < 0) {
3ebffb98 2986 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2987 return minus_one_set_errno(-err);
2988 }
2989 }
6509154d 2990 }
2991
2992 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2993 inet6dev = cur->elem;
2994 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2995 return minus_one_set_errno(-errno);
2996
2997 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2998 return minus_one_set_errno(EINVAL);
b670016a 2999
3000 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3001 if (netdev->type == LXC_NET_IPVLAN) {
3002 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3003 if (err < 0) {
3ebffb98 3004 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3005 return minus_one_set_errno(-err);
3006 }
3007 }
6509154d 3008 }
3009
3010 return 0;
3011}
3012
b670016a 3013static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3014 char bufinet4[INET_ADDRSTRLEN];
3015 unsigned int errCount = 0;
3016
3017 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3018 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3019 return minus_one_set_errno(EINVAL);
3020 }
3021
3022 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3023 if (lo_ifindex > 0) {
3024 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3025 errCount++;
3026 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3027 }
3028 }
3029
3030 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3031 if (link[0] != '\0') {
3032 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3033 errCount++;
3034 }
3035
3036 if (errCount > 0)
3037 return minus_one_set_errno(EINVAL);
3038
3039 return 0;
3040}
3041
3042static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3043 char bufinet6[INET6_ADDRSTRLEN];
3044 unsigned int errCount = 0;
3045
3046 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3047 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3048 return minus_one_set_errno(EINVAL);
3049 }
3050
3051 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3052 if (lo_ifindex > 0) {
3053 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3054 errCount++;
3055 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3056 }
3057 }
3058
3059 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3060 if (link[0] != '\0') {
3061 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3062 errCount++;
3063 }
3064
3065 if (errCount > 0)
3066 return minus_one_set_errno(EINVAL);
3067
3068 return 0;
3069}
3070
6509154d 3071static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3072 unsigned int lo_ifindex = 0;
3073 unsigned int errCount = 0;
6509154d 3074 struct lxc_list *cur, *next;
3075 struct lxc_inetdev *inet4dev;
3076 struct lxc_inet6dev *inet6dev;
6509154d 3077
b670016a 3078 /* Perform IPVLAN specific checks. */
3079 if (netdev->type == LXC_NET_IPVLAN) {
3080 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3081 lo_ifindex = if_nametoindex(loop_device);
b670016a 3082 if (lo_ifindex == 0) {
3083 errCount++;
3ebffb98 3084 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3085 }
b670016a 3086 }
6509154d 3087
b670016a 3088 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3089 inet4dev = cur->elem;
3090 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3091 errCount++;
6509154d 3092 }
3093
3094 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3095 inet6dev = cur->elem;
b670016a 3096 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3097 errCount++;
6509154d 3098 }
3099
b670016a 3100 if (errCount > 0)
6509154d 3101 return minus_one_set_errno(EINVAL);
3102
3103 return 0;
3104}
3105
e389f2af 3106static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3107{
811ef482
CB
3108 struct lxc_list *iterator;
3109 struct lxc_list *network = &handler->conf->network;
3110
811ef482
CB
3111 lxc_list_for_each(iterator, network) {
3112 struct lxc_netdev *netdev = iterator->elem;
3113
3114 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3115 ERROR("Invalid network configuration type %d", netdev->type);
3116 return -1;
3117 }
3118
6509154d 3119 /* Setup l2proxy entries if enabled and used with a link property */
3120 if (netdev->l2proxy && netdev->link[0] != '\0') {
3121 if (lxc_setup_l2proxy(netdev)) {
3122 ERROR("Failed to setup l2proxy");
3123 return -1;
3124 }
3125 }
3126
811ef482
CB
3127 if (netdev_conf[netdev->type](handler, netdev)) {
3128 ERROR("Failed to create network device");
3129 return -1;
3130 }
811ef482
CB
3131 }
3132
3133 return 0;
3134}
3135
e389f2af 3136int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3137{
e389f2af
CB
3138 pid_t pid = handler->pid;
3139 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3140 struct lxc_list *iterator;
3141
e0010464 3142 if (am_guest_unpriv())
74c6e2b0 3143 return 0;
811ef482
CB
3144
3145 lxc_list_for_each(iterator, network) {
e389f2af
CB
3146 int ret;
3147 char ifname[IFNAMSIZ];
811ef482
CB
3148 struct lxc_netdev *netdev = iterator->elem;
3149
811ef482
CB
3150 if (!netdev->ifindex)
3151 continue;
3152
3153 /* retrieve the name of the interface */
3154 if (!if_indextoname(netdev->ifindex, ifname)) {
3155 ERROR("No interface corresponding to ifindex \"%d\"",
3156 netdev->ifindex);
3157 return -1;
3158 }
3159
535e8859
CB
3160 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3161 if (ret) {
6d1400b5 3162 errno = -ret;
e389f2af
CB
3163 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3164 ifname, pid);
811ef482
CB
3165 return -1;
3166 }
3167
e389f2af
CB
3168 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3169
3170 DEBUG("Moved network device \"%s\" to network namespace of %d",
3171 netdev->created_name, pid);
811ef482
CB
3172 }
3173
3174 return 0;
3175}
3176
3c09b97c
CB
3177static int network_requires_advanced_setup(int type)
3178{
3179 if (type == LXC_NET_EMPTY)
3180 return false;
3181
3182 if (type == LXC_NET_NONE)
3183 return false;
3184
3185 return true;
3186}
3187
e389f2af 3188static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3189{
e389f2af
CB
3190 int hooks_version = handler->conf->hooks_version;
3191 const char *lxcname = handler->name;
3192 const char *lxcpath = handler->lxcpath;
3193 struct lxc_list *network = &handler->conf->network;
3194 pid_t pid = handler->pid;
74c6e2b0
CB
3195 struct lxc_list *iterator;
3196
74c6e2b0
CB
3197 lxc_list_for_each(iterator, network) {
3198 struct lxc_netdev *netdev = iterator->elem;
3199
3c09b97c 3200 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3201 continue;
3202
3203 if (netdev->type != LXC_NET_VETH) {
e389f2af 3204 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3205 lxc_net_type_to_str(netdev->type));
3206 return -1;
3207 }
3208
3209 if (netdev->mtu)
3210 INFO("mtu ignored due to insufficient privilege");
3211
e389f2af
CB
3212 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3213 pid, hooks_version))
74c6e2b0
CB
3214 return -1;
3215 }
3216
3217 return 0;
3218}
3219
1bd8d726 3220bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3221{
3222 int ret;
3223 struct lxc_list *iterator;
3224 struct lxc_list *network = &handler->conf->network;
1bd8d726 3225
811ef482
CB
3226 lxc_list_for_each(iterator, network) {
3227 char *hostveth = NULL;
3228 struct lxc_netdev *netdev = iterator->elem;
3229
3230 /* We can only delete devices whose ifindex we have. If we don't
3231 * have the index it means that we didn't create it.
3232 */
3233 if (!netdev->ifindex)
3234 continue;
3235
6509154d 3236 /* Delete l2proxy entries if enabled and used with a link property */
3237 if (netdev->l2proxy && netdev->link[0] != '\0') {
3238 if (lxc_delete_l2proxy(netdev))
3239 WARN("Failed to delete all l2proxy config");
3240 /* Don't return, let the network be cleaned up as normal. */
3241 }
3242
811ef482
CB
3243 if (netdev->type == LXC_NET_PHYS) {
3244 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3245 if (ret < 0)
3246 WARN("Failed to rename interface with index %d "
b809f232
CB
3247 "from \"%s\" to its initial name \"%s\"",
3248 netdev->ifindex, netdev->name, netdev->link);
0b154989 3249 else {
29589196
CB
3250 TRACE("Renamed interface with index %d from "
3251 "\"%s\" to its initial name \"%s\"",
3252 netdev->ifindex, netdev->name,
3253 netdev->link);
0b154989
TP
3254
3255 /* Restore original MTU */
3256 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3257 if (ret < 0) {
3258 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3259 netdev->link, netdev->priv.phys_attr.mtu);
3260 } else {
3261 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3262 netdev->link, netdev->priv.phys_attr.mtu);
3263 }
3264 }
b3259dc6
TP
3265
3266 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3267 goto clear_ifindices;
811ef482
CB
3268 }
3269
3270 ret = netdev_deconf[netdev->type](handler, netdev);
3271 if (ret < 0)
3272 WARN("Failed to deconfigure network device");
3273
3274 /* Recent kernels remove the virtual interfaces when the network
3275 * namespace is destroyed but in case we did not move the
3276 * interface to the network namespace, we have to destroy it.
3277 */
1bd8d726 3278 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3279 if (ret < 0) {
3280 if (errno != ENODEV) {
3281 WARN("Failed to remove interface \"%s\" with index %d",
3282 netdev->name[0] != '\0' ? netdev->name : "(null)",
3283 netdev->ifindex);
3284 goto clear_ifindices;
3285 }
3286 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3287 netdev->name[0] != '\0' ? netdev->name : "(null)",
3288 netdev->ifindex);
811ef482 3289 }
1bd8d726 3290 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3291 netdev->name[0] != '\0' ? netdev->name : "(null)",
3292 netdev->ifindex);
811ef482
CB
3293
3294 if (netdev->type != LXC_NET_VETH)
66a7c406 3295 goto clear_ifindices;
811ef482 3296
811ef482
CB
3297 /* Explicitly delete host veth device to prevent lingering
3298 * devices. We had issues in LXD around this.
3299 */
de4855a8 3300 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3301 hostveth = netdev->priv.veth_attr.pair;
3302 else
3303 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3304 if (hostveth[0] == '\0')
66a7c406 3305 goto clear_ifindices;
811ef482
CB
3306
3307 ret = lxc_netdev_delete_by_name(hostveth);
3308 if (ret < 0) {
24548539
CB
3309 WARN("Failed to remove interface \"%s\" from \"%s\"",
3310 hostveth, netdev->link);
66a7c406 3311 goto clear_ifindices;
811ef482
CB
3312 }
3313 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3314
c869be20 3315 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3316 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3317 netdev->ifindex = 0;
3318 netdev->priv.veth_attr.ifindex = 0;
3319 goto clear_ifindices;
811ef482
CB
3320 }
3321
3322 /* Delete the openvswitch port. */
3323 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3324 if (ret < 0)
3325 WARN("Failed to remove port \"%s\" from openvswitch "
3326 "bridge \"%s\"", hostveth, netdev->link);
3327 else
3328 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3329 hostveth, netdev->link);
3330
66a7c406 3331clear_ifindices:
ad2ddfcd 3332 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3333 * have cached stale data which would cause it to fail on reboot
3334 * we're we don't re-read the on-disk config file.
3335 */
3336 netdev->ifindex = 0;
3337 if (netdev->type == LXC_NET_PHYS) {
3338 netdev->priv.phys_attr.ifindex = 0;
3339 } else if (netdev->type == LXC_NET_VETH) {
3340 netdev->priv.veth_attr.veth1[0] = '\0';
3341 netdev->priv.veth_attr.ifindex = 0;
3342 }
811ef482
CB
3343 }
3344
bb84beda 3345 return true;
811ef482
CB
3346}
3347
3348int lxc_requests_empty_network(struct lxc_handler *handler)
3349{
3350 struct lxc_list *network = &handler->conf->network;
3351 struct lxc_list *iterator;
3352 bool found_none = false, found_nic = false;
3353
3354 if (lxc_list_empty(network))
3355 return 0;
3356
3357 lxc_list_for_each(iterator, network) {
3358 struct lxc_netdev *netdev = iterator->elem;
3359
3360 if (netdev->type == LXC_NET_NONE)
3361 found_none = true;
3362 else
3363 found_nic = true;
3364 }
3365 if (found_none && !found_nic)
3366 return 1;
3367 return 0;
3368}
3369
3370/* try to move physical nics to the init netns */
b809f232 3371int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3372{
3373 int ret;
b809f232 3374 int oldfd;
811ef482 3375 char ifname[IFNAMSIZ];
b809f232 3376 struct lxc_list *iterator;
28d9e29e 3377 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3378 struct lxc_conf *conf = handler->conf;
811ef482 3379
b809f232
CB
3380 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3381 * the parent network namespace. We won't have this capability if we are
3382 * unprivileged.
3383 */
d0fbc7ba 3384 if (!handler->am_root)
b809f232 3385 return 0;
811ef482 3386
b809f232 3387 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3388
0037ab49 3389 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3390 if (oldfd < 0) {
3391 SYSERROR("Failed to preserve network namespace");
b809f232 3392 return -1;
811ef482
CB
3393 }
3394
b809f232 3395 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3396 if (ret < 0) {
3397 SYSERROR("Failed to enter network namespace");
3398 close(oldfd);
b809f232 3399 return -1;
811ef482
CB
3400 }
3401
b809f232
CB
3402 lxc_list_for_each(iterator, &conf->network) {
3403 struct lxc_netdev *netdev = iterator->elem;
811ef482 3404
b809f232
CB
3405 if (netdev->type != LXC_NET_PHYS)
3406 continue;
3407
3408 /* Retrieve the name of the interface in the container's network
3409 * namespace.
3410 */
3411 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3412 WARN("No interface corresponding to ifindex %d",
b809f232 3413 netdev->ifindex);
811ef482
CB
3414 continue;
3415 }
b809f232 3416
0037ab49 3417 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3418 if (ret < 0)
811ef482
CB
3419 WARN("Error moving network device \"%s\" back to "
3420 "network namespace", ifname);
b809f232
CB
3421 else
3422 TRACE("Moved network device \"%s\" back to network "
3423 "namespace", ifname);
811ef482 3424 }
811ef482 3425
b809f232 3426 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3427 close(oldfd);
b809f232
CB
3428 if (ret < 0) {
3429 SYSERROR("Failed to enter network namespace");
3430 return -1;
3431 }
3432
3433 return 0;
811ef482
CB
3434}
3435
3436static int setup_hw_addr(char *hwaddr, const char *ifname)
3437{
3438 struct sockaddr sockaddr;
3439 struct ifreq ifr;
6d1400b5 3440 int ret, fd;
811ef482
CB
3441
3442 ret = lxc_convert_mac(hwaddr, &sockaddr);
3443 if (ret) {
6d1400b5 3444 errno = -ret;
3445 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3446 return -1;
3447 }
3448
3449 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3450 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3451 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3452
ad9429e5 3453 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3454 if (fd < 0)
3455 return -1;
3456
3457 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3458 if (ret)
6d1400b5 3459 SYSERROR("Failed to perform ioctl");
3460
3461 close(fd);
811ef482
CB
3462
3463 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3464 ifr.ifr_name);
3465
3466 return ret;
3467}
3468
3469static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3470{
3471 struct lxc_list *iterator;
3472 int err;
3473
3474 lxc_list_for_each(iterator, ip) {
3475 struct lxc_inetdev *inetdev = iterator->elem;
3476
3477 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3478 &inetdev->bcast, inetdev->prefix);
3479 if (err) {
6d1400b5 3480 errno = -err;
3481 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3482 "with ifindex %d", ifindex);
811ef482
CB
3483 return -1;
3484 }
3485 }
3486
3487 return 0;
3488}
3489
3490static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3491{
3492 struct lxc_list *iterator;
3493 int err;
3494
3495 lxc_list_for_each(iterator, ip) {
3496 struct lxc_inet6dev *inet6dev = iterator->elem;
3497
3498 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3499 &inet6dev->mcast, &inet6dev->acast,
3500 inet6dev->prefix);
3501 if (err) {
6d1400b5 3502 errno = -err;
3503 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3504 "with ifindex %d", ifindex);
811ef482
CB
3505 return -1;
3506 }
3507 }
3508
3509 return 0;
3510}
3511
3512static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3513{
3514 char ifname[IFNAMSIZ];
3515 int err;
811ef482 3516 char *current_ifname = ifname;
009d6127 3517 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3518
3519 /* empty network namespace */
3520 if (!netdev->ifindex) {
3521 if (netdev->flags & IFF_UP) {
3522 err = lxc_netdev_up("lo");
3523 if (err) {
6d1400b5 3524 errno = -err;
3525 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3526 return -1;
3527 }
3528 }
3529
3530 if (netdev->type == LXC_NET_EMPTY)
3531 return 0;
3532
3533 if (netdev->type == LXC_NET_NONE)
3534 return 0;
3535
e389f2af
CB
3536 netdev->ifindex = if_nametoindex(netdev->created_name);
3537 if (!netdev->ifindex)
3538 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3c09b97c 3539 netdev->created_name ?: "(null)");
811ef482
CB
3540 }
3541
3542 /* get the new ifindex in case of physical netdev */
3543 if (netdev->type == LXC_NET_PHYS) {
3544 netdev->ifindex = if_nametoindex(netdev->link);
3545 if (!netdev->ifindex) {
3546 ERROR("Failed to get ifindex for network device \"%s\"",
3547 netdev->link);
3548 return -1;
3549 }
3550 }
3551
3552 /* retrieve the name of the interface */
3553 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3554 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3555 netdev->ifindex);
811ef482
CB
3556 return -1;
3557 }
3558
e389f2af 3559 /* Default: let the system choose an interface name.
811ef482
CB
3560 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3561 * netlink will replace the format specifier with an appropriate index.
3562 */
de4855a8
CB
3563 if (netdev->name[0] == '\0') {
3564 if (netdev->type == LXC_NET_PHYS)
94b1cade 3565 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3566 else
94b1cade 3567 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3568 }
811ef482
CB
3569
3570 /* rename the interface name */
e389f2af
CB
3571 if (strcmp(current_ifname, netdev->name) != 0) {
3572 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3573 if (err) {
6d1400b5 3574 errno = -err;
3575 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3576 current_ifname, netdev->name);
811ef482
CB
3577 return -1;
3578 }
e389f2af
CB
3579
3580 TRACE("Renamed network device from \"%s\" to \"%s\"",
3581 current_ifname, netdev->name);
811ef482
CB
3582 }
3583
3584 /* Re-read the name of the interface because its name has changed
3585 * and would be automatically allocated by the system
3586 */
3587 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3588 ERROR("Failed get name for network device with ifindex %d",
3589 netdev->ifindex);
3590 return -1;
3591 }
3592
790255cf
CB
3593 /* Now update the recorded name of the network device to reflect the
3594 * name of the network device in the child's network namespace. We will
3595 * later on send this information back to the parent.
3596 */
94b1cade 3597 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3598
811ef482
CB
3599 /* set a mac address */
3600 if (netdev->hwaddr) {
3601 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3602 ERROR("Failed to setup hw address for network device \"%s\"",
3603 current_ifname);
3604 return -1;
3605 }
3606 }
3607
3608 /* setup ipv4 addresses on the interface */
3609 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3610 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3611 current_ifname);
811ef482
CB
3612 return -1;
3613 }
3614
3615 /* setup ipv6 addresses on the interface */
3616 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3617 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3618 current_ifname);
811ef482
CB
3619 return -1;
3620 }
3621
3622 /* set the network device up */
3623 if (netdev->flags & IFF_UP) {
811ef482
CB
3624 err = lxc_netdev_up(current_ifname);
3625 if (err) {
6d1400b5 3626 errno = -err;
3627 SYSERROR("Failed to set network device \"%s\" up",
3628 current_ifname);
811ef482
CB
3629 return -1;
3630 }
3631
3632 /* the network is up, make the loopback up too */
3633 err = lxc_netdev_up("lo");
3634 if (err) {
6d1400b5 3635 errno = -err;
3636 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3637 return -1;
3638 }
3639 }
3640
811ef482 3641 /* setup ipv4 gateway on the interface */
a2f9a670 3642 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3643 if (!(netdev->flags & IFF_UP)) {
3644 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3645 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3646 return -1;
3647 }
3648
3649 if (lxc_list_empty(&netdev->ipv4)) {
3650 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3651 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3652 return -1;
3653 }
3654
a2f9a670 3655 /* Setup device route if ipv4_gateway_dev is enabled */
3656 if (netdev->ipv4_gateway_dev) {
3657 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3658 if (err < 0) {
3659 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3660 current_ifname);
a2f9a670 3661 return minus_one_set_errno(-err);
811ef482 3662 }
a2f9a670 3663 } else {
009d6127 3664 /* Check the gateway address is valid */
3665 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3666 return minus_one_set_errno(errno);
3667
3668 /* Try adding a default route to the gateway address */
811ef482 3669 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3670 if (err < 0) {
3671 /* If adding the default route fails, this could be because the
3672 * gateway address is in a different subnet to the container's address.
3673 * To work around this, we try adding a static device route to the
3674 * gateway address first, and then try again.
3675 */
a2f9a670 3676 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3677 if (err < 0) {
a2f9a670 3678 errno = -err;
009d6127 3679 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3680 bufinet4, current_ifname);
009d6127 3681 return -1;
a2f9a670 3682 }
6d1400b5 3683
a2f9a670 3684 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3685 if (err < 0) {
a2f9a670 3686 errno = -err;
009d6127 3687 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3688 bufinet4, current_ifname);
a2f9a670 3689 return -1;
811ef482 3690 }
811ef482
CB
3691 }
3692 }
3693 }
3694
3695 /* setup ipv6 gateway on the interface */
a2f9a670 3696 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3697 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3698 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3699 current_ifname);
811ef482
CB
3700 return -1;
3701 }
3702
3703 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3704 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3705 current_ifname);
811ef482
CB
3706 return -1;
3707 }
3708
a2f9a670 3709 /* Setup device route if ipv6_gateway_dev is enabled */
3710 if (netdev->ipv6_gateway_dev) {
3711 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3712 if (err < 0) {
3713 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3714 current_ifname);
a2f9a670 3715 return minus_one_set_errno(-err);
811ef482 3716 }
a2f9a670 3717 } else {
009d6127 3718 /* Check the gateway address is valid */
3719 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3720 return minus_one_set_errno(errno);
3721
3722 /* Try adding a default route to the gateway address */
811ef482 3723 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3724 if (err < 0) {
3725 /* If adding the default route fails, this could be because the
3726 * gateway address is in a different subnet to the container's address.
3727 * To work around this, we try adding a static device route to the
3728 * gateway address first, and then try again.
3729 */
a2f9a670 3730 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3731 if (err < 0) {
a2f9a670 3732 errno = -err;
009d6127 3733 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3734 bufinet6, current_ifname);
009d6127 3735 return -1;
a2f9a670 3736 }
6d1400b5 3737
a2f9a670 3738 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3739 if (err < 0) {
a2f9a670 3740 errno = -err;
009d6127 3741 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3742 bufinet6, current_ifname);
a2f9a670 3743 return -1;
811ef482 3744 }
811ef482
CB
3745 }
3746 }
3747 }
3748
74c6e2b0 3749 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3750
3751 return 0;
3752}
3753
3754int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3755 struct lxc_list *network)
3756{
3757 struct lxc_list *iterator;
811ef482 3758
811ef482 3759 lxc_list_for_each(iterator, network) {
e389f2af 3760 struct lxc_netdev *netdev = iterator->elem;
811ef482 3761
811ef482 3762 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3763 ERROR("Failed to setup netdev");
811ef482
CB
3764 return -1;
3765 }
3766 }
3767
3768 if (!lxc_list_empty(network))
e389f2af 3769 INFO("Network has been setup");
811ef482
CB
3770
3771 return 0;
3772}
7ab1ba02 3773
3c09b97c 3774int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3775{
3776 struct lxc_list *iterator;
3777 struct lxc_list *network = &handler->conf->network;
3778 int data_sock = handler->data_sock[0];
3779
7ab1ba02
CB
3780 lxc_list_for_each(iterator, network) {
3781 int ret;
3782 struct lxc_netdev *netdev = iterator->elem;
3783
3c09b97c 3784 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3785 continue;
3786
7fbb15ec 3787 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3788 if (ret < 0)
7ab1ba02 3789 return -1;
e389f2af
CB
3790
3791 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3792 if (ret < 0)
3793 return -1;
3794
3795 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3796 }
3797
3798 return 0;
3799}
3800
3c09b97c 3801int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3802{
3803 struct lxc_list *iterator;
3804 struct lxc_list *network = &handler->conf->network;
3805 int data_sock = handler->data_sock[1];
3806
7ab1ba02
CB
3807 lxc_list_for_each(iterator, network) {
3808 int ret;
3809 struct lxc_netdev *netdev = iterator->elem;
3810
3c09b97c 3811 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3812 continue;
3813
e3233f26 3814 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3815 if (ret < 0)
7ab1ba02 3816 return -1;
e389f2af
CB
3817
3818 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3819 if (ret < 0)
3820 return -1;
3821 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3822 }
3823
3824 return 0;
3825}
a1ae535a
CB
3826
3827int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3828{
3829 struct lxc_list *iterator, *network;
3830 int data_sock = handler->data_sock[0];
3831
3832 if (!handler->am_root)
3833 return 0;
3834
3835 network = &handler->conf->network;
3836 lxc_list_for_each(iterator, network) {
3837 int ret;
3838 struct lxc_netdev *netdev = iterator->elem;
3839
3840 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3841 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3842 if (ret < 0)
7729f8e5 3843 return -1;
a1ae535a
CB
3844
3845 /* Send network device ifindex in the child's namespace to
3846 * parent.
3847 */
7fbb15ec 3848 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3849 if (ret < 0)
7729f8e5 3850 return -1;
a1ae535a
CB
3851 }
3852
e389f2af
CB
3853 if (!lxc_list_empty(network))
3854 TRACE("Sent network device names and ifindices to parent");
3855
a1ae535a 3856 return 0;
a1ae535a
CB
3857}
3858
3859int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3860{
3861 struct lxc_list *iterator, *network;
3862 int data_sock = handler->data_sock[1];
3863
3864 if (!handler->am_root)
3865 return 0;
3866
3867 network = &handler->conf->network;
3868 lxc_list_for_each(iterator, network) {
3869 int ret;
3870 struct lxc_netdev *netdev = iterator->elem;
3871
3872 /* Receive network device name in the child's namespace to
3873 * parent.
3874 */
e3233f26 3875 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3876 if (ret < 0)
7729f8e5 3877 return -1;
a1ae535a
CB
3878
3879 /* Receive network device ifindex in the child's namespace to
3880 * parent.
3881 */
e3233f26 3882 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3883 if (ret < 0)
7729f8e5 3884 return -1;
a1ae535a
CB
3885 }
3886
3887 return 0;
a1ae535a 3888}
bb84beda
CB
3889
3890void lxc_delete_network(struct lxc_handler *handler)
3891{
3892 bool bret;
3893
3894 if (handler->am_root)
3895 bret = lxc_delete_network_priv(handler);
3896 else
3897 bret = lxc_delete_network_unpriv(handler);
3898 if (!bret)
3899 DEBUG("Failed to delete network devices");
3900 else
3901 DEBUG("Deleted network devices");
3902}
1cd95214 3903
1cd95214
CB
3904int lxc_netns_set_nsid(int fd)
3905{
41a3300d 3906 int ret;
0ce60f0d
CB
3907 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3908 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3909 NLMSG_ALIGN(1024)];
1cd95214 3910 struct nl_handler nlh;
0ce60f0d
CB
3911 struct nlmsghdr *hdr;
3912 struct rtgenmsg *msg;
bfcedc7e 3913 int saved_errno;
9d036caa
CB
3914 const __s32 ns_id = -1;
3915 const __u32 netns_fd = fd;
1cd95214
CB
3916
3917 ret = netlink_open(&nlh, NETLINK_ROUTE);
3918 if (ret < 0)
41a3300d 3919 return -1;
1cd95214 3920
0ce60f0d 3921 memset(buf, 0, sizeof(buf));
6ce39620
CB
3922
3923#pragma GCC diagnostic push
3924#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3925 hdr = (struct nlmsghdr *)buf;
3926 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3927#pragma GCC diagnostic pop
1cd95214 3928
0ce60f0d
CB
3929 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3930 hdr->nlmsg_type = RTM_NEWNSID;
3931 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3932 hdr->nlmsg_pid = 0;
3933 hdr->nlmsg_seq = RTM_NEWNSID;
3934 msg->rtgen_family = AF_UNSPEC;
1cd95214 3935
9d036caa
CB
3936 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3937 if (ret < 0)
3938 goto on_error;
3939
3940 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3941 if (ret < 0)
3942 goto on_error;
1cd95214 3943
9fbbc427 3944 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3945
3946on_error:
bfcedc7e 3947 saved_errno = errno;
1cd95214 3948 netlink_close(&nlh);
bfcedc7e 3949 errno = saved_errno;
1cd95214 3950
9d036caa 3951 return ret;
1cd95214 3952}
938980ba
CB
3953
3954static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3955{
3956
3957 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3958
3959 while (RTA_OK(rta, len)) {
3960 unsigned short type = rta->rta_type;
3961
3962 if ((type <= max) && (!tb[type]))
3963 tb[type] = rta;
3964
6ce39620
CB
3965#pragma GCC diagnostic push
3966#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3967 rta = RTA_NEXT(rta, len);
6ce39620 3968#pragma GCC diagnostic pop
938980ba
CB
3969 }
3970
3971 return 0;
3972}
3973
3974static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3975{
3976 return *(__s32 *)RTA_DATA(rta);
3977}
3978
3979#ifndef NETNS_RTA
3980#define NETNS_RTA(r) \
3981 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3982#endif
3983
3984int lxc_netns_get_nsid(int fd)
3985{
3986 int ret;
3987 ssize_t len;
3988 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3989 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3990 NLMSG_ALIGN(1024)];
938980ba
CB
3991 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3992 struct nl_handler nlh;
3993 struct nlmsghdr *hdr;
3994 struct rtgenmsg *msg;
3995 int saved_errno;
3996 __u32 netns_fd = fd;
3997
3998 ret = netlink_open(&nlh, NETLINK_ROUTE);
3999 if (ret < 0)
4000 return -1;
4001
4002 memset(buf, 0, sizeof(buf));
6ce39620
CB
4003
4004#pragma GCC diagnostic push
4005#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4006 hdr = (struct nlmsghdr *)buf;
4007 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4008#pragma GCC diagnostic pop
938980ba
CB
4009
4010 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4011 hdr->nlmsg_type = RTM_GETNSID;
4012 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4013 hdr->nlmsg_pid = 0;
4014 hdr->nlmsg_seq = RTM_GETNSID;
4015 msg->rtgen_family = AF_UNSPEC;
4016
9d036caa
CB
4017 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4018 if (ret == 0)
4019 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 4020
938980ba
CB
4021 saved_errno = errno;
4022 netlink_close(&nlh);
4023 errno = saved_errno;
4024 if (ret < 0)
4025 return -1;
4026
9d036caa 4027 errno = EINVAL;
938980ba
CB
4028 msg = NLMSG_DATA(hdr);
4029 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4030 if (len < 0)
4031 return -1;
4032
6ce39620
CB
4033#pragma GCC diagnostic push
4034#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4035 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4036 if (tb[__LXC_NETNSA_NSID])
4037 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4038#pragma GCC diagnostic pop
938980ba
CB
4039
4040 return -1;
4041}
e389f2af
CB
4042
4043int lxc_create_network(struct lxc_handler *handler)
4044{
4045 int ret;
4046
4047 /*
4048 * Find gateway addresses from the link device, which is no longer
4049 * accessible inside the container. Do this before creating network
4050 * interfaces, since goto out_delete_net does not work before
4051 * lxc_clone.
4052 */
4053 ret = lxc_find_gateway_addresses(handler);
4054 if (ret) {
4055 ERROR("Failed to find gateway addresses");
4056 return -1;
4057 }
4058
4059 if (handler->am_root) {
4060 ret = lxc_create_network_priv(handler);
4061 if (ret)
4062 return -1;
4063
4064 return lxc_network_move_created_netdev_priv(handler);
4065 }
4066
4067 return lxc_create_network_unpriv(handler);
4068}