]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: simplify instantiate_ipvlan()
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 71static const char loop_device[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
8021de25 335 char peer[IFNAMSIZ];
811ef482 336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
8021de25
CB
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
346 return -1;
347
8021de25 348 if (!lxc_mkifname(peer))
811ef482
CB
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
6d1400b5 354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
966e9f1f 357 goto on_error;
811ef482
CB
358 }
359
a9704f05
CB
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
811ef482
CB
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 365 goto on_error;
811ef482
CB
366 }
367
3bef7b7b
TP
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
811ef482 384 if (netdev->upscript) {
14a7b0f9
CB
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
966e9f1f 395 goto on_error;
811ef482
CB
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
966e9f1f
CB
402
403on_error:
811ef482 404 lxc_netdev_delete_by_name(peer);
811ef482
CB
405 return -1;
406}
407
c9f52382 408static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409{
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493}
494
495static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496{
dd119206 497 char peer[IFNAMSIZ];
c9f52382 498 int err;
006e135e 499 unsigned int mtu = 0;
c9f52382 500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
dd119206
CB
506 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 508 return -1;
509
dd119206 510 if (!lxc_mkifname(peer))
c9f52382 511 return -1;
512
dd119206
CB
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
514 netdev->priv.ipvlan_attr.isolation);
c9f52382 515 if (err) {
dd119206
CB
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
517 peer, netdev->link);
c9f52382 518 goto on_error;
519 }
520
521 netdev->ifindex = if_nametoindex(peer);
522 if (!netdev->ifindex) {
523 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
524 goto on_error;
525 }
526
006e135e 527 if (netdev->mtu) {
528 err = lxc_safe_uint(netdev->mtu, &mtu);
529 if (err < 0) {
530 errno = -err;
dd119206
CB
531 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
532 netdev->mtu, peer);
006e135e 533 goto on_error;
534 }
535
536 err = lxc_netdev_set_mtu(peer, mtu);
537 if (err < 0) {
538 errno = -err;
dd119206
CB
539 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
540 netdev->mtu, peer);
006e135e 541 goto on_error;
542 }
543 }
544
c9f52382 545 if (netdev->upscript) {
546 char *argv[] = {
547 "ipvlan",
548 netdev->link,
549 NULL,
550 };
551
dd119206
CB
552 err = run_script_argv(handler->name, handler->conf->hooks_version,
553 "net", netdev->upscript, "up", argv);
c9f52382 554 if (err < 0)
555 goto on_error;
556 }
557
dd119206
CB
558 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
559 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 560
561 return 0;
562
563on_error:
564 lxc_netdev_delete_by_name(peer);
565 return -1;
566}
567
811ef482
CB
568static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
569{
570 char peer[IFNAMSIZ];
571 int err;
572 static uint16_t vlan_cntr = 0;
573 unsigned int mtu = 0;
574
de4855a8 575 if (netdev->link[0] == '\0') {
811ef482
CB
576 ERROR("No link for vlan network device specified");
577 return -1;
578 }
579
580 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
581 if (err < 0 || (size_t)err >= sizeof(peer))
582 return -1;
583
584 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
585 if (err) {
6d1400b5 586 errno = -err;
587 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
588 peer, netdev->link);
811ef482
CB
589 return -1;
590 }
591
592 netdev->ifindex = if_nametoindex(peer);
593 if (!netdev->ifindex) {
594 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 595 goto on_error;
596 }
597
598 if (netdev->mtu) {
599 err = lxc_safe_uint(netdev->mtu, &mtu);
600 if (err < 0) {
601 errno = -err;
602 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
603 goto on_error;
604 }
605
606 err = lxc_netdev_set_mtu(peer, mtu);
607 if (err) {
608 errno = -err;
609 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
610 goto on_error;
611 }
811ef482
CB
612 }
613
3a73d9f1 614 if (netdev->upscript) {
615 char *argv[] = {
616 "vlan",
617 netdev->link,
618 NULL,
619 };
620
621 err = run_script_argv(handler->name,
622 handler->conf->hooks_version, "net",
623 netdev->upscript, "up", argv);
19abca58 624 if (err < 0) {
3e2a7b08 625 goto on_error;
19abca58 626 }
3a73d9f1 627 }
628
3bef7b7b 629 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
811ef482 630 peer, netdev->ifindex);
811ef482
CB
631
632 return 0;
3e2a7b08 633
634on_error:
635 lxc_netdev_delete_by_name(peer);
636 return -1;
811ef482
CB
637}
638
639static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
640{
0b154989 641 int err, mtu_orig = 0;
3bef7b7b 642 unsigned int mtu = 0;
14a7b0f9 643
de4855a8 644 if (netdev->link[0] == '\0') {
811ef482
CB
645 ERROR("No link for physical interface specified");
646 return -1;
647 }
648
790255cf
CB
649 /* Note that we're retrieving the container's ifindex in the host's
650 * network namespace because we need it to move the device from the
651 * host's network namespace to the container's network namespace later
652 * on.
653 * Note that netdev->link will contain the name of the physical network
654 * device in the host's namespace.
655 */
811ef482
CB
656 netdev->ifindex = if_nametoindex(netdev->link);
657 if (!netdev->ifindex) {
658 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
659 return -1;
660 }
661
790255cf
CB
662 /* Store the ifindex of the host's network device in the host's
663 * namespace.
664 */
665 netdev->priv.phys_attr.ifindex = netdev->ifindex;
666
0b154989
TP
667 /* Get original device MTU setting and store for restoration after container shutdown. */
668 mtu_orig = netdev_get_mtu(netdev->ifindex);
669 if (mtu_orig < 0) {
670 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
671 return minus_one_set_errno(-mtu_orig);
672 }
673
674 netdev->priv.phys_attr.mtu = mtu_orig;
675
3bef7b7b
TP
676 if (netdev->mtu) {
677 err = lxc_safe_uint(netdev->mtu, &mtu);
678 if (err < 0) {
679 errno = -err;
680 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
681 return -1;
682 }
14a7b0f9 683
3bef7b7b
TP
684 err = lxc_netdev_set_mtu(netdev->link, mtu);
685 if (err < 0) {
686 errno = -err;
687 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
688 return -1;
689 }
690 }
691
692 if (netdev->upscript) {
693 char *argv[] = {
694 "phys",
695 netdev->link,
696 NULL,
697 };
698
699 err = run_script_argv(handler->name,
700 handler->conf->hooks_version, "net",
701 netdev->upscript, "up", argv);
702 if (err < 0) {
703 return -1;
704 }
705 }
706
707 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
811ef482
CB
708
709 return 0;
710}
711
712static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
713{
14a7b0f9
CB
714 int ret;
715 char *argv[] = {
716 "empty",
717 NULL,
718 };
719
811ef482 720 netdev->ifindex = 0;
14a7b0f9
CB
721 if (!netdev->upscript)
722 return 0;
723
724 ret = run_script_argv(handler->name, handler->conf->hooks_version,
725 "net", netdev->upscript, "up", argv);
726 if (ret < 0)
727 return -1;
728
811ef482
CB
729 return 0;
730}
731
732static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
733{
734 netdev->ifindex = 0;
735 return 0;
736}
737
738static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
739 [LXC_NET_VETH] = instantiate_veth,
740 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 741 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
742 [LXC_NET_VLAN] = instantiate_vlan,
743 [LXC_NET_PHYS] = instantiate_phys,
744 [LXC_NET_EMPTY] = instantiate_empty,
745 [LXC_NET_NONE] = instantiate_none,
746};
747
748static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
749{
14a7b0f9
CB
750 int ret;
751 char *argv[] = {
752 "veth",
753 netdev->link,
754 NULL,
755 NULL,
756 };
757
758 if (!netdev->downscript)
759 return 0;
811ef482 760
de4855a8 761 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 762 argv[2] = netdev->priv.veth_attr.pair;
811ef482 763 else
14a7b0f9
CB
764 argv[2] = netdev->priv.veth_attr.veth1;
765
766 ret = run_script_argv(handler->name,
767 handler->conf->hooks_version, "net",
768 netdev->downscript, "down", argv);
769 if (ret < 0)
770 return -1;
811ef482 771
811ef482
CB
772 return 0;
773}
774
775static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
776{
14a7b0f9
CB
777 int ret;
778 char *argv[] = {
779 "macvlan",
780 netdev->link,
781 NULL,
782 };
783
784 if (!netdev->downscript)
785 return 0;
786
787 ret = run_script_argv(handler->name, handler->conf->hooks_version,
788 "net", netdev->downscript, "down", argv);
789 if (ret < 0)
790 return -1;
811ef482 791
811ef482
CB
792 return 0;
793}
794
c9f52382 795static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
796{
797 int ret;
798 char *argv[] = {
799 "ipvlan",
800 netdev->link,
801 NULL,
802 };
803
804 if (!netdev->downscript)
805 return 0;
806
807 ret = run_script_argv(handler->name, handler->conf->hooks_version,
808 "net", netdev->downscript, "down", argv);
809 if (ret < 0)
810 return -1;
811
812 return 0;
813}
814
811ef482
CB
815static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
816{
3a73d9f1 817 int ret;
818 char *argv[] = {
819 "vlan",
820 netdev->link,
821 NULL,
822 };
823
824 if (!netdev->downscript)
825 return 0;
826
827 ret = run_script_argv(handler->name, handler->conf->hooks_version,
828 "net", netdev->downscript, "down", argv);
829 if (ret < 0)
830 return -1;
831
811ef482
CB
832 return 0;
833}
834
835static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
836{
14a7b0f9
CB
837 int ret;
838 char *argv[] = {
839 "phys",
840 netdev->link,
841 NULL,
842 };
843
844 if (!netdev->downscript)
845 return 0;
846
847 ret = run_script_argv(handler->name, handler->conf->hooks_version,
848 "net", netdev->downscript, "down", argv);
849 if (ret < 0)
850 return -1;
811ef482 851
811ef482
CB
852 return 0;
853}
854
855static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
856{
14a7b0f9
CB
857 int ret;
858 char *argv[] = {
859 "empty",
860 NULL,
861 };
862
863 if (!netdev->downscript)
864 return 0;
865
866 ret = run_script_argv(handler->name, handler->conf->hooks_version,
867 "net", netdev->downscript, "down", argv);
868 if (ret < 0)
869 return -1;
811ef482 870
811ef482
CB
871 return 0;
872}
873
874static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
875{
876 return 0;
877}
878
879static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
880 [LXC_NET_VETH] = shutdown_veth,
881 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 882 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
883 [LXC_NET_VLAN] = shutdown_vlan,
884 [LXC_NET_PHYS] = shutdown_phys,
885 [LXC_NET_EMPTY] = shutdown_empty,
886 [LXC_NET_NONE] = shutdown_none,
887};
888
0037ab49
TP
889static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
890{
891 int err;
892 struct nl_handler nlh;
893 struct ifinfomsg *ifi;
894 struct nlmsg *nlmsg = NULL;
895
896 err = netlink_open(&nlh, NETLINK_ROUTE);
897 if (err)
898 return err;
899
900 err = -ENOMEM;
901 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
902 if (!nlmsg)
903 goto out;
904
905 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
906 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
907
908 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
909 if (!ifi)
910 goto out;
911 ifi->ifi_family = AF_UNSPEC;
912 ifi->ifi_index = ifindex;
913
914 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
915 goto out;
916
917 if (ifname != NULL) {
918 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
919 goto out;
920 }
921
922 err = netlink_transaction(&nlh, nlmsg, nlmsg);
923out:
924 netlink_close(&nlh);
925 nlmsg_free(nlmsg);
926 return err;
927}
928
ebc73a67 929int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 930{
ebc73a67 931 int err;
0ad19a3f 932 struct nl_handler nlh;
06f976ca 933 struct ifinfomsg *ifi;
ebc73a67 934 struct nlmsg *nlmsg = NULL;
0ad19a3f 935
3cfc0f3a
MN
936 err = netlink_open(&nlh, NETLINK_ROUTE);
937 if (err)
938 return err;
0ad19a3f 939
3cfc0f3a 940 err = -ENOMEM;
0ad19a3f 941 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
942 if (!nlmsg)
943 goto out;
944
ebc73a67 945 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
946 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
947
948 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
949 if (!ifi)
950 goto out;
06f976ca
SZ
951 ifi->ifi_family = AF_UNSPEC;
952 ifi->ifi_index = ifindex;
0ad19a3f 953
954 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
955 goto out;
956
8d357196
DY
957 if (ifname != NULL) {
958 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
959 goto out;
960 }
961
3cfc0f3a 962 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 963out:
964 netlink_close(&nlh);
965 nlmsg_free(nlmsg);
966 return err;
967}
968
ebc73a67
CB
969/* If we are asked to move a wireless interface, then we must actually move its
970 * phyN device. Detect that condition and return the physname here. The physname
971 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
972 */
973#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 974static char *is_wlan(const char *ifname)
e5848d39 975{
b0293710 976 __do_free char *path = NULL;
ebc73a67 977 int i, ret;
e5848d39 978 long physlen;
ebc73a67 979 size_t len;
e5848d39 980 FILE *f;
ebc73a67 981 char *physname = NULL;
e5848d39 982
ebc73a67 983 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 984 path = must_realloc(NULL, len + 1);
e5848d39 985 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 986 if (ret < 0 || (size_t)ret >= len)
e5848d39 987 goto bad;
ebc73a67 988
ebc73a67
CB
989 f = fopen(path, "r");
990 if (!f)
e5848d39 991 goto bad;
ebc73a67 992
1a0e70ac 993 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
994 fseek(f, 0, SEEK_END);
995 physlen = ftell(f);
996 fseek(f, 0, SEEK_SET);
7d1cde93
SX
997 if (physlen < 0) {
998 fclose(f);
0382c0da 999 goto bad;
7d1cde93 1000 }
ebc73a67
CB
1001
1002 physname = malloc(physlen + 1);
ee54ea9a 1003 if (!physname) {
acf47e1b 1004 fclose(f);
e5848d39 1005 goto bad;
ee54ea9a 1006 }
ebc73a67
CB
1007
1008 memset(physname, 0, physlen + 1);
e5848d39
SH
1009 ret = fread(physname, 1, physlen, f);
1010 fclose(f);
1011 if (ret < 0)
1012 goto bad;
1013
ebc73a67 1014 for (i = 0; i < physlen; i++) {
e5848d39
SH
1015 if (physname[i] == '\n')
1016 physname[i] = '\0';
ebc73a67 1017
e5848d39
SH
1018 if (physname[i] == '\0')
1019 break;
1020 }
1021
1022 return physname;
1023
1024bad:
f10fad2f 1025 free(physname);
e5848d39
SH
1026 return NULL;
1027}
1028
ebc73a67
CB
1029static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1030 const char *new)
e5848d39 1031{
ebc73a67 1032 pid_t fpid;
e5848d39 1033
ebc73a67 1034 fpid = fork();
e5848d39
SH
1035 if (fpid < 0)
1036 return -1;
ebc73a67 1037
e5848d39
SH
1038 if (fpid != 0)
1039 return wait_for_pid(fpid);
ebc73a67 1040
e5848d39
SH
1041 if (!switch_to_ns(pid, "net"))
1042 return -1;
ebc73a67 1043
05ec44f8 1044 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1045}
1046
ebc73a67
CB
1047static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1048 const char *newname)
e5848d39 1049{
e5848d39 1050 char *cmd;
ebc73a67
CB
1051 pid_t fpid;
1052 int err = -1;
e5848d39
SH
1053
1054 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1055 * However, IIUC this involves a bit more complicated work to talk to
1056 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1057 */
1058 cmd = on_path("iw", NULL);
1059 if (!cmd)
1060 goto out1;
1061 free(cmd);
1062
1063 fpid = fork();
1064 if (fpid < 0)
1065 goto out1;
ebc73a67 1066
e5848d39
SH
1067 if (fpid == 0) {
1068 char pidstr[30];
1069 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1070 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1071 (char *)NULL);
ebd582ae 1072 _exit(EXIT_FAILURE);
e5848d39 1073 }
ebc73a67 1074
e5848d39
SH
1075 if (wait_for_pid(fpid))
1076 goto out1;
1077
1078 err = 0;
1079 if (newname)
1080 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1081
1082out1:
1083 free(physname);
1084 return err;
1085}
1086
8d357196 1087int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1088{
1089 int index;
e5848d39 1090 char *physname;
8befa924 1091
8befa924
SH
1092 if (!ifname)
1093 return -EINVAL;
1094
32571606 1095 index = if_nametoindex(ifname);
49428bf3
DY
1096 if (!index)
1097 return -EINVAL;
32571606 1098
ebc73a67
CB
1099 physname = is_wlan(ifname);
1100 if (physname)
e5848d39
SH
1101 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1102
8d357196 1103 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1104}
1105
b84f58b9 1106int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1107{
b84f58b9 1108 int err;
ebc73a67
CB
1109 struct ifinfomsg *ifi;
1110 struct nl_handler nlh;
1111 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1112
3cfc0f3a
MN
1113 err = netlink_open(&nlh, NETLINK_ROUTE);
1114 if (err)
1115 return err;
0ad19a3f 1116
3cfc0f3a 1117 err = -ENOMEM;
0ad19a3f 1118 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1119 if (!nlmsg)
1120 goto out;
1121
06f976ca 1122 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1123 if (!answer)
1124 goto out;
1125
ebc73a67 1126 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1127 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1128
1129 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1130 if (!ifi)
1131 goto out;
06f976ca
SZ
1132 ifi->ifi_family = AF_UNSPEC;
1133 ifi->ifi_index = ifindex;
0ad19a3f 1134
3cfc0f3a 1135 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1136out:
1137 netlink_close(&nlh);
1138 nlmsg_free(answer);
1139 nlmsg_free(nlmsg);
1140 return err;
1141}
1142
b84f58b9
DL
1143int lxc_netdev_delete_by_name(const char *name)
1144{
1145 int index;
1146
1147 index = if_nametoindex(name);
1148 if (!index)
1149 return -EINVAL;
1150
1151 return lxc_netdev_delete_by_index(index);
1152}
1153
1154int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1155{
ebc73a67 1156 int err, len;
06f976ca 1157 struct ifinfomsg *ifi;
ebc73a67
CB
1158 struct nl_handler nlh;
1159 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1160
3cfc0f3a
MN
1161 err = netlink_open(&nlh, NETLINK_ROUTE);
1162 if (err)
1163 return err;
b9a5bb58 1164
b84f58b9 1165 len = strlen(newname);
90d79629
CB
1166 if (len == 1 || len >= IFNAMSIZ) {
1167 err = -EINVAL;
b84f58b9 1168 goto out;
90d79629 1169 }
b84f58b9 1170
3cfc0f3a 1171 err = -ENOMEM;
b9a5bb58
DL
1172 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1173 if (!nlmsg)
1174 goto out;
1175
06f976ca 1176 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1177 if (!answer)
1178 goto out;
1179
ebc73a67 1180 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1181 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1182
1183 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1184 if (!ifi)
1185 goto out;
06f976ca
SZ
1186 ifi->ifi_family = AF_UNSPEC;
1187 ifi->ifi_index = ifindex;
b84f58b9
DL
1188
1189 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1190 goto out;
b9a5bb58 1191
3cfc0f3a 1192 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1193out:
1194 netlink_close(&nlh);
1195 nlmsg_free(answer);
1196 nlmsg_free(nlmsg);
1197 return err;
1198}
1199
b84f58b9
DL
1200int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1201{
1202 int len, index;
1203
1204 len = strlen(oldname);
dae3fdf6 1205 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1206 return -EINVAL;
1207
1208 index = if_nametoindex(oldname);
1209 if (!index)
1210 return -EINVAL;
1211
1212 return lxc_netdev_rename_by_index(index, newname);
1213}
1214
8befa924 1215int netdev_set_flag(const char *name, int flag)
0ad19a3f 1216{
ebc73a67 1217 int err, index, len;
06f976ca 1218 struct ifinfomsg *ifi;
ebc73a67
CB
1219 struct nl_handler nlh;
1220 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1221
3cfc0f3a
MN
1222 err = netlink_open(&nlh, NETLINK_ROUTE);
1223 if (err)
1224 return err;
0ad19a3f 1225
3cfc0f3a 1226 err = -EINVAL;
0ad19a3f 1227 len = strlen(name);
dae3fdf6 1228 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1229 goto out;
1230
3cfc0f3a 1231 err = -ENOMEM;
0ad19a3f 1232 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1233 if (!nlmsg)
1234 goto out;
1235
06f976ca 1236 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1237 if (!answer)
1238 goto out;
1239
3cfc0f3a 1240 err = -EINVAL;
0ad19a3f 1241 index = if_nametoindex(name);
1242 if (!index)
1243 goto out;
1244
ebc73a67 1245 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1246 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1247
1248 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1249 if (!ifi) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
06f976ca
SZ
1253 ifi->ifi_family = AF_UNSPEC;
1254 ifi->ifi_index = index;
1255 ifi->ifi_change |= IFF_UP;
1256 ifi->ifi_flags |= flag;
0ad19a3f 1257
1258 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1259out:
1260 netlink_close(&nlh);
1261 nlmsg_free(nlmsg);
1262 nlmsg_free(answer);
1263 return err;
1264}
1265
ebc73a67 1266int netdev_get_flag(const char *name, int *flag)
efa1cf45 1267{
ebc73a67 1268 int err, index, len;
a4318300 1269 struct ifinfomsg *ifi;
ebc73a67
CB
1270 struct nl_handler nlh;
1271 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1272
1273 if (!name)
1274 return -EINVAL;
1275
1276 err = netlink_open(&nlh, NETLINK_ROUTE);
1277 if (err)
1278 return err;
1279
1280 err = -EINVAL;
1281 len = strlen(name);
1282 if (len == 1 || len >= IFNAMSIZ)
1283 goto out;
1284
1285 err = -ENOMEM;
1286 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1287 if (!nlmsg)
1288 goto out;
1289
06f976ca 1290 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1291 if (!answer)
1292 goto out;
1293
1294 err = -EINVAL;
1295 index = if_nametoindex(name);
1296 if (!index)
1297 goto out;
1298
06f976ca
SZ
1299 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1300 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1301
1302 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1303 if (!ifi) {
1304 err = -ENOMEM;
1305 goto out;
1306 }
06f976ca
SZ
1307 ifi->ifi_family = AF_UNSPEC;
1308 ifi->ifi_index = index;
efa1cf45
DY
1309
1310 err = netlink_transaction(&nlh, nlmsg, answer);
1311 if (err)
1312 goto out;
1313
06f976ca 1314 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1315
1316 *flag = ifi->ifi_flags;
1317out:
1318 netlink_close(&nlh);
1319 nlmsg_free(nlmsg);
1320 nlmsg_free(answer);
1321 return err;
1322}
1323
1324/*
1325 * \brief Check a interface is up or not.
1326 *
1327 * \param name: name for the interface.
1328 *
1329 * \return int.
1330 * 0 means interface is down.
1331 * 1 means interface is up.
1332 * Others means error happened, and ret-value is the error number.
1333 */
ebc73a67 1334int lxc_netdev_isup(const char *name)
efa1cf45 1335{
ebc73a67 1336 int err, flag;
efa1cf45
DY
1337
1338 err = netdev_get_flag(name, &flag);
1339 if (err)
ebc73a67
CB
1340 return err;
1341
efa1cf45
DY
1342 if (flag & IFF_UP)
1343 return 1;
ebc73a67 1344
efa1cf45 1345 return 0;
efa1cf45
DY
1346}
1347
0130df54
SH
1348int netdev_get_mtu(int ifindex)
1349{
ebc73a67 1350 int answer_len, err, res;
0130df54 1351 struct nl_handler nlh;
06f976ca 1352 struct ifinfomsg *ifi;
0130df54 1353 struct nlmsghdr *msg;
ebc73a67
CB
1354 int readmore = 0, recv_len = 0;
1355 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1356
1357 err = netlink_open(&nlh, NETLINK_ROUTE);
1358 if (err)
1359 return err;
1360
1361 err = -ENOMEM;
1362 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1363 if (!nlmsg)
1364 goto out;
1365
06f976ca 1366 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1367 if (!answer)
1368 goto out;
1369
1370 /* Save the answer buffer length, since it will be overwritten
1371 * on the first receive (and we might need to receive more than
ebc73a67
CB
1372 * once.
1373 */
06f976ca
SZ
1374 answer_len = answer->nlmsghdr->nlmsg_len;
1375
ebc73a67 1376 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1377 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1378
06f976ca 1379 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1380 if (!ifi)
1381 goto out;
06f976ca 1382 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1383
1384 /* Send the request for addresses, which returns all addresses
1385 * on all interfaces. */
1386 err = netlink_send(&nlh, nlmsg);
1387 if (err < 0)
1388 goto out;
1389
6ce39620
CB
1390#pragma GCC diagnostic push
1391#pragma GCC diagnostic ignored "-Wcast-align"
1392
0130df54
SH
1393 do {
1394 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1395 * overwritten by a previous receive.
1396 */
06f976ca 1397 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1398
1399 /* Get the (next) batch of reply messages */
1400 err = netlink_rcv(&nlh, answer);
1401 if (err < 0)
1402 goto out;
1403
1404 recv_len = err;
0130df54
SH
1405
1406 /* Satisfy the typing for the netlink macros */
06f976ca 1407 msg = answer->nlmsghdr;
0130df54
SH
1408
1409 while (NLMSG_OK(msg, recv_len)) {
1410
1411 /* Stop reading if we see an error message */
1412 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1413 struct nlmsgerr *errmsg =
1414 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1415 err = errmsg->error;
1416 goto out;
1417 }
1418
1419 /* Stop reading if we see a NLMSG_DONE message */
1420 if (msg->nlmsg_type == NLMSG_DONE) {
1421 readmore = 0;
1422 break;
1423 }
1424
06f976ca 1425 ifi = NLMSG_DATA(msg);
0130df54
SH
1426 if (ifi->ifi_index == ifindex) {
1427 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1428 int attr_len =
1429 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1430 res = 0;
ebc73a67
CB
1431 while (RTA_OK(rta, attr_len)) {
1432 /* Found a local address for the
1433 * requested interface, return it.
1434 */
0130df54 1435 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1436 memcpy(&res, RTA_DATA(rta),
1437 sizeof(int));
0130df54
SH
1438 err = res;
1439 goto out;
1440 }
1441 rta = RTA_NEXT(rta, attr_len);
1442 }
0130df54
SH
1443 }
1444
ebc73a67
CB
1445 /* Keep reading more data from the socket if the last
1446 * message had the NLF_F_MULTI flag set.
1447 */
0130df54
SH
1448 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1449
ebc73a67 1450 /* Look at the next message received in this buffer. */
0130df54
SH
1451 msg = NLMSG_NEXT(msg, recv_len);
1452 }
1453 } while (readmore);
1454
6ce39620
CB
1455#pragma GCC diagnostic pop
1456
ebc73a67 1457 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1458 err = -1;
1459
1460out:
1461 netlink_close(&nlh);
1462 nlmsg_free(answer);
1463 nlmsg_free(nlmsg);
1464 return err;
1465}
1466
d472214b 1467int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1468{
ebc73a67 1469 int err, index, len;
06f976ca 1470 struct ifinfomsg *ifi;
ebc73a67
CB
1471 struct nl_handler nlh;
1472 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1473
3cfc0f3a
MN
1474 err = netlink_open(&nlh, NETLINK_ROUTE);
1475 if (err)
1476 return err;
75d09f83 1477
3cfc0f3a 1478 err = -EINVAL;
75d09f83 1479 len = strlen(name);
dae3fdf6 1480 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1481 goto out;
1482
3cfc0f3a 1483 err = -ENOMEM;
75d09f83
DL
1484 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1485 if (!nlmsg)
1486 goto out;
1487
06f976ca 1488 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1489 if (!answer)
1490 goto out;
1491
3cfc0f3a 1492 err = -EINVAL;
75d09f83
DL
1493 index = if_nametoindex(name);
1494 if (!index)
1495 goto out;
1496
ebc73a67 1497 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1498 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1499
1500 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1501 if (!ifi) {
1502 err = -ENOMEM;
1503 goto out;
1504 }
06f976ca
SZ
1505 ifi->ifi_family = AF_UNSPEC;
1506 ifi->ifi_index = index;
75d09f83
DL
1507
1508 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1509 goto out;
1510
1511 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1512out:
1513 netlink_close(&nlh);
1514 nlmsg_free(nlmsg);
1515 nlmsg_free(answer);
1516 return err;
1517}
1518
d472214b 1519int lxc_netdev_up(const char *name)
0ad19a3f 1520{
d472214b 1521 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1522}
1523
d472214b 1524int lxc_netdev_down(const char *name)
0ad19a3f 1525{
d472214b 1526 return netdev_set_flag(name, 0);
0ad19a3f 1527}
1528
497353b6 1529int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1530{
ebc73a67 1531 int err, len;
06f976ca 1532 struct ifinfomsg *ifi;
ebc73a67 1533 struct nl_handler nlh;
0ad19a3f 1534 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1535 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1536
3cfc0f3a
MN
1537 err = netlink_open(&nlh, NETLINK_ROUTE);
1538 if (err)
1539 return err;
0ad19a3f 1540
3cfc0f3a 1541 err = -EINVAL;
0ad19a3f 1542 len = strlen(name1);
dae3fdf6 1543 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1544 goto out;
1545
1546 len = strlen(name2);
dae3fdf6 1547 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1548 goto out;
1549
3cfc0f3a 1550 err = -ENOMEM;
0ad19a3f 1551 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1552 if (!nlmsg)
1553 goto out;
1554
06f976ca 1555 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1556 if (!answer)
1557 goto out;
1558
06f976ca 1559 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1560 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1561 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1562
1563 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1564 if (!ifi)
1565 goto out;
06f976ca 1566 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1567
3cfc0f3a 1568 err = -EINVAL;
79e68309 1569 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1570 if (!nest1)
1571 goto out;
1572
1573 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1574 goto out;
1575
1576 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1577 if (!nest2)
1578 goto out;
1579
1580 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1581 if (!nest3)
1582 goto out;
1583
06f976ca 1584 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1585 if (!ifi) {
1586 err = -ENOMEM;
06f976ca 1587 goto out;
25a9939b 1588 }
0ad19a3f 1589
1590 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1591 goto out;
1592
1593 nla_end_nested(nlmsg, nest3);
0ad19a3f 1594 nla_end_nested(nlmsg, nest2);
0ad19a3f 1595 nla_end_nested(nlmsg, nest1);
1596
1597 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1598 goto out;
1599
3cfc0f3a 1600 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1601out:
1602 netlink_close(&nlh);
1603 nlmsg_free(answer);
1604 nlmsg_free(nlmsg);
1605 return err;
1606}
1607
ebc73a67 1608/* TODO: merge with lxc_macvlan_create */
7c11d57a 1609int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1610{
ebc73a67 1611 int err, len, lindex;
06f976ca 1612 struct ifinfomsg *ifi;
ebc73a67 1613 struct nl_handler nlh;
26c39028 1614 struct rtattr *nest, *nest2;
ebc73a67 1615 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1616
3cfc0f3a
MN
1617 err = netlink_open(&nlh, NETLINK_ROUTE);
1618 if (err)
1619 return err;
26c39028 1620
3cfc0f3a 1621 err = -EINVAL;
26c39028 1622 len = strlen(master);
dae3fdf6 1623 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1624 goto err3;
1625
1626 len = strlen(name);
dae3fdf6 1627 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1628 goto err3;
1629
3cfc0f3a 1630 err = -ENOMEM;
26c39028
JHS
1631 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1632 if (!nlmsg)
1633 goto err3;
1634
06f976ca 1635 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1636 if (!answer)
1637 goto err2;
1638
3cfc0f3a 1639 err = -EINVAL;
26c39028
JHS
1640 lindex = if_nametoindex(master);
1641 if (!lindex)
1642 goto err1;
1643
06f976ca 1644 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1645 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1646 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1647
1648 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1649 if (!ifi) {
1650 err = -ENOMEM;
1651 goto err1;
1652 }
06f976ca 1653 ifi->ifi_family = AF_UNSPEC;
26c39028 1654
79e68309 1655 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1656 if (!nest)
1657 goto err1;
1658
1659 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1660 goto err1;
1661
1662 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1663 if (!nest2)
1664 goto err1;
e892973e 1665
26c39028
JHS
1666 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1667 goto err1;
e892973e 1668
26c39028 1669 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1670 nla_end_nested(nlmsg, nest);
1671
1672 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1673 goto err1;
1674
1675 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1676 goto err1;
1677
3cfc0f3a 1678 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1679err1:
1680 nlmsg_free(answer);
1681err2:
1682 nlmsg_free(nlmsg);
1683err3:
1684 netlink_close(&nlh);
1685 return err;
1686}
1687
e892973e 1688int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1689{
ebc73a67 1690 int err, index, len;
06f976ca 1691 struct ifinfomsg *ifi;
ebc73a67 1692 struct nl_handler nlh;
e892973e 1693 struct rtattr *nest, *nest2;
ebc73a67 1694 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1695
3cfc0f3a
MN
1696 err = netlink_open(&nlh, NETLINK_ROUTE);
1697 if (err)
1698 return err;
0ad19a3f 1699
3cfc0f3a 1700 err = -EINVAL;
0ad19a3f 1701 len = strlen(master);
dae3fdf6 1702 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1703 goto out;
1704
1705 len = strlen(name);
dae3fdf6 1706 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1707 goto out;
1708
3cfc0f3a 1709 err = -ENOMEM;
0ad19a3f 1710 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1711 if (!nlmsg)
1712 goto out;
1713
06f976ca 1714 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1715 if (!answer)
1716 goto out;
1717
3cfc0f3a 1718 err = -EINVAL;
0ad19a3f 1719 index = if_nametoindex(master);
1720 if (!index)
1721 goto out;
1722
06f976ca 1723 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1724 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1725 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1726
1727 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1728 if (!ifi) {
1729 err = -ENOMEM;
1730 goto out;
1731 }
06f976ca 1732 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1733
79e68309 1734 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1735 if (!nest)
1736 goto out;
1737
1738 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1739 goto out;
1740
e892973e
DL
1741 if (mode) {
1742 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1743 if (!nest2)
1744 goto out;
1745
1746 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1747 goto out;
1748
1749 nla_end_nested(nlmsg, nest2);
1750 }
1751
0ad19a3f 1752 nla_end_nested(nlmsg, nest);
1753
1754 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1755 goto out;
1756
1757 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1758 goto out;
1759
3cfc0f3a 1760 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1761out:
1762 netlink_close(&nlh);
1763 nlmsg_free(answer);
1764 nlmsg_free(nlmsg);
1765 return err;
1766}
1767
1768static int proc_sys_net_write(const char *path, const char *value)
1769{
ebc73a67
CB
1770 int fd;
1771 int err = 0;
0ad19a3f 1772
1773 fd = open(path, O_WRONLY);
1774 if (fd < 0)
1775 return -errno;
1776
f640cf46 1777 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1778 err = -errno;
1779
1780 close(fd);
1781 return err;
1782}
1783
6509154d 1784static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1785{
1786 int ret;
1787 char path[PATH_MAX];
1788 char buf[1] = "";
1789
1790 if (family != AF_INET && family != AF_INET6)
1791 return minus_one_set_errno(EINVAL);
1792
1793 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1794 family == AF_INET ? "ipv4" : "ipv6", ifname,
1795 "forwarding");
1796 if (ret < 0 || (size_t)ret >= PATH_MAX)
1797 return minus_one_set_errno(E2BIG);
1798
1799 return lxc_read_file_expect(path, buf, 1, "1");
1800}
1801
0ad19a3f 1802static int neigh_proxy_set(const char *ifname, int family, int flag)
1803{
9ba8130c 1804 int ret;
419590da 1805 char path[PATH_MAX];
0ad19a3f 1806
1807 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1808 return -EINVAL;
0ad19a3f 1809
419590da 1810 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1811 family == AF_INET ? "ipv4" : "ipv6", ifname,
1812 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1813 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1814 return -E2BIG;
0ad19a3f 1815
ebc73a67 1816 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1817}
1818
6509154d 1819static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1820{
1821 int ret;
1822 char path[PATH_MAX];
1823 char buf[1] = "";
1824
1825 if (family != AF_INET && family != AF_INET6)
1826 return minus_one_set_errno(EINVAL);
1827
1828 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1829 family == AF_INET ? "ipv4" : "ipv6", ifname,
1830 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1831 if (ret < 0 || (size_t)ret >= PATH_MAX)
1832 return minus_one_set_errno(E2BIG);
1833
1834 return lxc_read_file_expect(path, buf, 1, "1");
1835}
1836
497353b6 1837int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1838{
1839 return neigh_proxy_set(name, family, 1);
1840}
1841
497353b6 1842int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1843{
1844 return neigh_proxy_set(name, family, 0);
1845}
1846
1847int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1848{
1f1b18e7
DL
1849 int i = 0;
1850 unsigned val;
ebc73a67
CB
1851 char c;
1852 unsigned char *data;
1f1b18e7
DL
1853
1854 sockaddr->sa_family = ARPHRD_ETHER;
1855 data = (unsigned char *)sockaddr->sa_data;
1856
1857 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1858 c = *macaddr++;
1859 if (isdigit(c))
1860 val = c - '0';
1861 else if (c >= 'a' && c <= 'f')
1862 val = c - 'a' + 10;
1863 else if (c >= 'A' && c <= 'F')
1864 val = c - 'A' + 10;
1865 else
1866 return -EINVAL;
1867
1868 val <<= 4;
1869 c = *macaddr;
1870 if (isdigit(c))
1871 val |= c - '0';
1872 else if (c >= 'a' && c <= 'f')
1873 val |= c - 'a' + 10;
1874 else if (c >= 'A' && c <= 'F')
1875 val |= c - 'A' + 10;
1876 else if (c == ':' || c == 0)
1877 val >>= 4;
1878 else
1879 return -EINVAL;
1880 if (c != 0)
1881 macaddr++;
1882 *data++ = (unsigned char)(val & 0377);
1883 i++;
1884
1885 if (*macaddr == ':')
1886 macaddr++;
0ad19a3f 1887 }
0ad19a3f 1888
1f1b18e7 1889 return 0;
0ad19a3f 1890}
1891
ebc73a67
CB
1892static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1893 void *acast, int prefix)
0ad19a3f 1894{
ebc73a67 1895 int addrlen, err;
06f976ca 1896 struct ifaddrmsg *ifa;
ebc73a67
CB
1897 struct nl_handler nlh;
1898 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1899
ebc73a67
CB
1900 addrlen = family == AF_INET ? sizeof(struct in_addr)
1901 : sizeof(struct in6_addr);
4bf1968d 1902
3cfc0f3a
MN
1903 err = netlink_open(&nlh, NETLINK_ROUTE);
1904 if (err)
1905 return err;
0ad19a3f 1906
3cfc0f3a 1907 err = -ENOMEM;
0ad19a3f 1908 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1909 if (!nlmsg)
1910 goto out;
1911
06f976ca 1912 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1913 if (!answer)
1914 goto out;
1915
06f976ca 1916 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1917 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1918 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1919
1920 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1921 if (!ifa)
25a9939b 1922 goto out;
06f976ca
SZ
1923 ifa->ifa_prefixlen = prefix;
1924 ifa->ifa_index = ifindex;
1925 ifa->ifa_family = family;
1926 ifa->ifa_scope = 0;
acf47e1b 1927
3cfc0f3a 1928 err = -EINVAL;
4bf1968d 1929 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1930 goto out;
1931
4bf1968d 1932 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1933 goto out;
1934
d8948a52 1935 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1936 goto out;
1937
ebc73a67 1938 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1939 err = -EPROTONOSUPPORT;
79881dc6
DL
1940 if (family == AF_INET6 &&
1941 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1942 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1943 goto out;
0ad19a3f 1944
3cfc0f3a 1945 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1946out:
1947 netlink_close(&nlh);
1948 nlmsg_free(answer);
1949 nlmsg_free(nlmsg);
1950 return err;
1951}
1952
1f1b18e7 1953int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1954 struct in6_addr *mcast, struct in6_addr *acast,
1955 int prefix)
1f1b18e7
DL
1956{
1957 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1958}
1959
ebc73a67
CB
1960int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1961 int prefix)
1f1b18e7
DL
1962{
1963 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1964}
1965
ebc73a67
CB
1966/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1967 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1968 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1969 */
6ce39620
CB
1970#pragma GCC diagnostic push
1971#pragma GCC diagnostic ignored "-Wcast-align"
1972
ebc73a67
CB
1973static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1974{
1975 int addrlen;
06f976ca
SZ
1976 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1977 struct rtattr *rta = IFA_RTA(ifa);
1978 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1979
06f976ca 1980 if (ifa->ifa_family != family)
19a26f82
MK
1981 return 0;
1982
ebc73a67
CB
1983 addrlen = family == AF_INET ? sizeof(struct in_addr)
1984 : sizeof(struct in6_addr);
19a26f82
MK
1985
1986 /* Loop over the rtattr's in this message */
ebc73a67 1987 while (RTA_OK(rta, attr_len)) {
19a26f82 1988 /* Found a local address for the requested interface,
ebc73a67
CB
1989 * return it.
1990 */
1991 if (rta->rta_type == IFA_LOCAL ||
1992 rta->rta_type == IFA_ADDRESS) {
1993 /* Sanity check. The family check above should make sure
1994 * the address length is correct, but check here just in
1995 * case.
1996 */
19a26f82
MK
1997 if (RTA_PAYLOAD(rta) != addrlen)
1998 return -1;
1999
ebc73a67
CB
2000 /* We might have found an IFA_ADDRESS before, which we
2001 * now overwrite with an IFA_LOCAL.
2002 */
dd66e5ad 2003 if (!*res) {
19a26f82 2004 *res = malloc(addrlen);
dd66e5ad
DE
2005 if (!*res)
2006 return -1;
2007 }
19a26f82
MK
2008
2009 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2010 if (rta->rta_type == IFA_LOCAL)
2011 break;
2012 }
2013 rta = RTA_NEXT(rta, attr_len);
2014 }
2015 return 0;
2016}
2017
6ce39620
CB
2018#pragma GCC diagnostic pop
2019
19a26f82
MK
2020static int ip_addr_get(int family, int ifindex, void **res)
2021{
ebc73a67 2022 int answer_len, err;
06f976ca 2023 struct ifaddrmsg *ifa;
ebc73a67 2024 struct nl_handler nlh;
19a26f82 2025 struct nlmsghdr *msg;
ebc73a67
CB
2026 int readmore = 0, recv_len = 0;
2027 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2028
2029 err = netlink_open(&nlh, NETLINK_ROUTE);
2030 if (err)
2031 return err;
2032
2033 err = -ENOMEM;
2034 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2035 if (!nlmsg)
2036 goto out;
2037
06f976ca 2038 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2039 if (!answer)
2040 goto out;
2041
ebc73a67
CB
2042 /* Save the answer buffer length, since it will be overwritten on the
2043 * first receive (and we might need to receive more than once).
2044 */
06f976ca
SZ
2045 answer_len = answer->nlmsghdr->nlmsg_len;
2046
ebc73a67 2047 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2048 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2049
06f976ca 2050 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2051 if (!ifa)
2052 goto out;
06f976ca 2053 ifa->ifa_family = family;
19a26f82 2054
ebc73a67
CB
2055 /* Send the request for addresses, which returns all addresses on all
2056 * interfaces.
2057 */
19a26f82
MK
2058 err = netlink_send(&nlh, nlmsg);
2059 if (err < 0)
2060 goto out;
19a26f82 2061
6ce39620
CB
2062#pragma GCC diagnostic push
2063#pragma GCC diagnostic ignored "-Wcast-align"
2064
19a26f82
MK
2065 do {
2066 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2067 * overwritten by a previous receive.
2068 */
06f976ca 2069 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2070
ebc73a67 2071 /* Get the (next) batch of reply messages. */
19a26f82
MK
2072 err = netlink_rcv(&nlh, answer);
2073 if (err < 0)
2074 goto out;
2075
2076 recv_len = err;
2077 err = 0;
2078
ebc73a67 2079 /* Satisfy the typing for the netlink macros. */
06f976ca 2080 msg = answer->nlmsghdr;
19a26f82
MK
2081
2082 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2083 /* Stop reading if we see an error message. */
19a26f82 2084 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2085 struct nlmsgerr *errmsg =
2086 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2087 err = errmsg->error;
2088 goto out;
2089 }
2090
ebc73a67 2091 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2092 if (msg->nlmsg_type == NLMSG_DONE) {
2093 readmore = 0;
2094 break;
2095 }
2096
2097 if (msg->nlmsg_type != RTM_NEWADDR) {
2098 err = -1;
2099 goto out;
2100 }
2101
06f976ca
SZ
2102 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2103 if (ifa->ifa_index == ifindex) {
2104 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2105 err = -1;
2106 goto out;
2107 }
2108
ebc73a67 2109 /* Found a result, stop searching. */
19a26f82
MK
2110 if (*res)
2111 goto out;
2112 }
2113
ebc73a67
CB
2114 /* Keep reading more data from the socket if the last
2115 * message had the NLF_F_MULTI flag set.
2116 */
19a26f82
MK
2117 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2118
ebc73a67 2119 /* Look at the next message received in this buffer. */
19a26f82
MK
2120 msg = NLMSG_NEXT(msg, recv_len);
2121 }
2122 } while (readmore);
2123
6ce39620
CB
2124#pragma GCC diagnostic pop
2125
19a26f82 2126 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2127 * error.
2128 */
19a26f82
MK
2129 err = -1;
2130
2131out:
2132 netlink_close(&nlh);
2133 nlmsg_free(answer);
2134 nlmsg_free(nlmsg);
2135 return err;
2136}
2137
2138int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2139{
ebc73a67 2140 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2141}
2142
ebc73a67 2143int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2144{
ebc73a67 2145 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2146}
2147
f8fee0e2
MK
2148static int ip_gateway_add(int family, int ifindex, void *gw)
2149{
ebc73a67 2150 int addrlen, err;
f8fee0e2 2151 struct nl_handler nlh;
06f976ca 2152 struct rtmsg *rt;
ebc73a67 2153 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2154
ebc73a67
CB
2155 addrlen = family == AF_INET ? sizeof(struct in_addr)
2156 : sizeof(struct in6_addr);
f8fee0e2
MK
2157
2158 err = netlink_open(&nlh, NETLINK_ROUTE);
2159 if (err)
2160 return err;
2161
2162 err = -ENOMEM;
2163 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2164 if (!nlmsg)
2165 goto out;
2166
06f976ca 2167 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2168 if (!answer)
2169 goto out;
2170
06f976ca 2171 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2172 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2173 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2174
2175 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2176 if (!rt)
2177 goto out;
06f976ca
SZ
2178 rt->rtm_family = family;
2179 rt->rtm_table = RT_TABLE_MAIN;
2180 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2181 rt->rtm_protocol = RTPROT_BOOT;
2182 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2183 /* "default" destination */
06f976ca 2184 rt->rtm_dst_len = 0;
f8fee0e2
MK
2185
2186 err = -EINVAL;
a2f9a670 2187
2188 /* If gateway address not supplied, then a device route will be created instead */
2189 if (gw != NULL) {
2190 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2191 goto out;
2192 }
f8fee0e2
MK
2193
2194 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2195 * addresses for the gateway.
2196 */
f8fee0e2
MK
2197 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2198 goto out;
2199
2200 err = netlink_transaction(&nlh, nlmsg, answer);
2201out:
2202 netlink_close(&nlh);
2203 nlmsg_free(answer);
2204 nlmsg_free(nlmsg);
2205 return err;
2206}
2207
2208int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2209{
2210 return ip_gateway_add(AF_INET, ifindex, gw);
2211}
2212
2213int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2214{
2215 return ip_gateway_add(AF_INET6, ifindex, gw);
2216}
581c75e7 2217bool is_ovs_bridge(const char *bridge)
0d204771 2218{
ebc73a67 2219 int ret;
0d204771 2220 struct stat sb;
ebc73a67 2221 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2222
ebc73a67
CB
2223 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2224 bridge);
2225 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2226 return false;
2227
2228 ret = stat(brdirname, &sb);
2229 if (ret < 0 && errno == ENOENT)
0d204771 2230 return true;
ebc73a67 2231
0d204771
SH
2232 return false;
2233}
2234
581c75e7
CB
2235struct ovs_veth_args {
2236 const char *bridge;
2237 const char *nic;
2238};
2239
cb0dc11b
CB
2240/* Called from a background thread - when nic goes away, remove it from the
2241 * bridge.
c43cbc04 2242 */
581c75e7 2243static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2244{
581c75e7 2245 struct ovs_veth_args *args = data;
cb0dc11b 2246
581c75e7
CB
2247 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2248 (char *)NULL);
2249 return -1;
c43cbc04
SH
2250}
2251
581c75e7 2252int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2253{
c43cbc04 2254 int ret;
419590da 2255 char cmd_output[PATH_MAX];
581c75e7 2256 struct ovs_veth_args args;
6ad22d06 2257
581c75e7
CB
2258 args.bridge = bridge;
2259 args.nic = nic;
2260 ret = run_command(cmd_output, sizeof(cmd_output),
2261 lxc_ovs_delete_port_exec, (void *)&args);
2262 if (ret < 0) {
2263 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2264 "%s", bridge, nic, cmd_output);
6ad22d06 2265 return -1;
581c75e7 2266 }
0d204771 2267
581c75e7
CB
2268 return 0;
2269}
ebc73a67 2270
581c75e7
CB
2271static int lxc_ovs_attach_bridge_exec(void *data)
2272{
2273 struct ovs_veth_args *args = data;
ebc73a67 2274
581c75e7
CB
2275 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2276 (char *)NULL);
2277 return -1;
2278}
ebc73a67 2279
581c75e7
CB
2280static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2281{
2282 int ret;
419590da 2283 char cmd_output[PATH_MAX];
581c75e7 2284 struct ovs_veth_args args;
ebc73a67 2285
581c75e7
CB
2286 args.bridge = bridge;
2287 args.nic = nic;
2288 ret = run_command(cmd_output, sizeof(cmd_output),
2289 lxc_ovs_attach_bridge_exec, (void *)&args);
2290 if (ret < 0) {
2291 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2292 bridge, nic, cmd_output);
2293 return -1;
c43cbc04 2294 }
0d204771 2295
581c75e7 2296 return 0;
0d204771 2297}
0d204771 2298
581c75e7 2299int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2300{
ebc73a67 2301 int err, fd, index;
9de31d5a 2302 size_t retlen;
0ad19a3f 2303 struct ifreq ifr;
2304
dae3fdf6 2305 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2306 return -EINVAL;
0ad19a3f 2307
2308 index = if_nametoindex(ifname);
2309 if (!index)
3cfc0f3a 2310 return -EINVAL;
0ad19a3f 2311
0d204771 2312 if (is_ovs_bridge(bridge))
581c75e7 2313 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2314
ad9429e5 2315 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2316 if (fd < 0)
3cfc0f3a 2317 return -errno;
0ad19a3f 2318
9de31d5a 2319 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2320 if (retlen >= IFNAMSIZ) {
2321 close(fd);
9de31d5a 2322 return -E2BIG;
42cc4083 2323 }
9de31d5a 2324
ebc73a67 2325 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2326 ifr.ifr_ifindex = index;
7d163508 2327 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2328 close(fd);
3cfc0f3a
MN
2329 if (err)
2330 err = -errno;
0ad19a3f 2331
2332 return err;
2333}
72d0e1cb 2334
ebc73a67 2335static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2336 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2337 [LXC_NET_VETH] = "veth",
2338 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2339 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2340 [LXC_NET_PHYS] = "phys",
b343592b
BP
2341 [LXC_NET_VLAN] = "vlan",
2342 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2343};
2344
2345const char *lxc_net_type_to_str(int type)
2346{
2347 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2348 return NULL;
ebc73a67 2349
72d0e1cb
SG
2350 return lxc_network_types[type];
2351}
8befa924 2352
ebc73a67 2353static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2354
966e9f1f 2355char *lxc_mkifname(char *template)
a0265685 2356{
2d7bf744 2357 int ret;
b1e44ed1 2358 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2359 char name[IFNAMSIZ];
2360 bool exists = false;
2361 size_t i = 0;
280cc35f 2362#ifdef HAVE_RAND_R
2363 unsigned int seed;
2364
2365 seed = randseed(false);
2366#else
2367
2368 (void)randseed(true);
2369#endif
a0265685 2370
535e8859
CB
2371 if (strlen(template) >= IFNAMSIZ)
2372 return NULL;
2373
ebc73a67 2374 /* Get all the network interfaces. */
b1e44ed1 2375 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2376 if (ret < 0) {
6d1400b5 2377 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2378 return NULL;
2379 }
a0265685 2380
ebc73a67 2381 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2382 for (;;) {
966e9f1f 2383 name[0] = '\0';
94b1cade 2384 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2385
966e9f1f 2386 exists = false;
280cc35f 2387
a0265685
SG
2388 for (i = 0; i < strlen(name); i++) {
2389 if (name[i] == 'X') {
2390#ifdef HAVE_RAND_R
8523344a 2391 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2392#else
8523344a 2393 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2394#endif
2395 }
2396 }
2397
2398 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2399 if (!strcmp(ifa->ifa_name, name)) {
2400 exists = true;
a0265685
SG
2401 break;
2402 }
2403 }
2404
966e9f1f 2405 if (!exists)
a0265685 2406 break;
a0265685
SG
2407 }
2408
b1e44ed1 2409 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2410 (void)strlcpy(template, name, strlen(template) + 1);
2411
2412 return template;
a0265685
SG
2413}
2414
8befa924
SH
2415int setup_private_host_hw_addr(char *veth1)
2416{
ebc73a67 2417 int err, sockfd;
8befa924 2418 struct ifreq ifr;
8befa924 2419
ad9429e5 2420 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2421 if (sockfd < 0)
2422 return -errno;
2423
ebc73a67 2424 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2425 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2426 close(sockfd);
ebc73a67 2427 return -E2BIG;
87c6e5db 2428 }
ebc73a67 2429
8befa924
SH
2430 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2431 if (err < 0) {
8befa924 2432 close(sockfd);
8befa924
SH
2433 return -errno;
2434 }
2435
2436 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2437 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2438 close(sockfd);
8befa924
SH
2439 if (err < 0)
2440 return -errno;
2441
2442 return 0;
2443}
811ef482
CB
2444
2445int lxc_find_gateway_addresses(struct lxc_handler *handler)
2446{
2447 struct lxc_list *network = &handler->conf->network;
2448 struct lxc_list *iterator;
2449 struct lxc_netdev *netdev;
2450 int link_index;
2451
2452 lxc_list_for_each(iterator, network) {
2453 netdev = iterator->elem;
2454
2455 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2456 continue;
2457
2458 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2459 ERROR("Automatic gateway detection is only supported "
2460 "for veth and macvlan");
2461 return -1;
2462 }
2463
de4855a8 2464 if (netdev->link[0] == '\0') {
811ef482
CB
2465 ERROR("Automatic gateway detection needs a link interface");
2466 return -1;
2467 }
2468
2469 link_index = if_nametoindex(netdev->link);
2470 if (!link_index)
2471 return -EINVAL;
2472
2473 if (netdev->ipv4_gateway_auto) {
2474 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2475 ERROR("Failed to automatically find ipv4 gateway "
2476 "address from link interface \"%s\"", netdev->link);
2477 return -1;
2478 }
2479 }
2480
2481 if (netdev->ipv6_gateway_auto) {
2482 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2483 ERROR("Failed to automatically find ipv6 gateway "
2484 "address from link interface \"%s\"", netdev->link);
2485 return -1;
2486 }
2487 }
2488 }
2489
2490 return 0;
2491}
2492
2493#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2494static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2495 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2496{
2497 int ret;
2498 pid_t child;
2499 int bytes, pipefd[2];
2500 char *token, *saveptr = NULL;
095ead80 2501 char netdev_link[IFNAMSIZ];
419590da 2502 char buffer[PATH_MAX] = {0};
94b1cade 2503 size_t retlen;
811ef482
CB
2504
2505 if (netdev->type != LXC_NET_VETH) {
2506 ERROR("Network type %d not support for unprivileged use", netdev->type);
2507 return -1;
2508 }
2509
2510 ret = pipe(pipefd);
2511 if (ret < 0) {
2512 SYSERROR("Failed to create pipe");
2513 return -1;
2514 }
2515
2516 child = fork();
2517 if (child < 0) {
2518 SYSERROR("Failed to create new process");
2519 close(pipefd[0]);
2520 close(pipefd[1]);
2521 return -1;
2522 }
2523
2524 if (child == 0) {
8335fd40 2525 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2526
2527 close(pipefd[0]);
2528
2529 ret = dup2(pipefd[1], STDOUT_FILENO);
2530 if (ret >= 0)
2531 ret = dup2(pipefd[1], STDERR_FILENO);
2532 close(pipefd[1]);
2533 if (ret < 0) {
2534 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2535 _exit(EXIT_FAILURE);
811ef482
CB
2536 }
2537
de4855a8 2538 if (netdev->link[0] != '\0')
9de31d5a 2539 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2540 else
9de31d5a
CB
2541 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2542 if (retlen >= IFNAMSIZ) {
2543 SYSERROR("Invalid network device name");
2544 _exit(EXIT_FAILURE);
2545 }
811ef482 2546
8335fd40
CB
2547 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2548 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2549 _exit(EXIT_FAILURE);
8335fd40 2550 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2551
2552 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2553 lxcname, pidstr, netdev_link,
de4855a8
CB
2554 netdev->name[0] != '\0' ? netdev->name : "(null)");
2555 if (netdev->name[0] != '\0')
811ef482
CB
2556 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2557 lxcpath, lxcname, pidstr, "veth", netdev_link,
2558 netdev->name, (char *)NULL);
2559 else
2560 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2561 lxcpath, lxcname, pidstr, "veth", netdev_link,
2562 (char *)NULL);
2563 SYSERROR("Failed to execute lxc-user-nic");
78070056 2564 _exit(EXIT_FAILURE);
811ef482
CB
2565 }
2566
2567 /* close the write-end of the pipe */
2568 close(pipefd[1]);
2569
419590da 2570 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2571 if (bytes < 0) {
74c6e2b0 2572 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2573 close(pipefd[0]);
6b9f82a9
CB
2574 } else {
2575 buffer[bytes - 1] = '\0';
811ef482 2576 }
811ef482
CB
2577
2578 ret = wait_for_pid(child);
2579 close(pipefd[0]);
6b9f82a9 2580 if (ret != 0 || bytes < 0) {
811ef482
CB
2581 ERROR("lxc-user-nic failed to configure requested network: %s",
2582 buffer[0] != '\0' ? buffer : "(null)");
2583 return -1;
2584 }
2585 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2586
2587 /* netdev->name */
2588 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2589 if (!token) {
2590 ERROR("Failed to parse lxc-user-nic output");
811ef482 2591 return -1;
74c6e2b0 2592 }
811ef482 2593
e389f2af
CB
2594 /*
2595 * lxc-user-nic will take care of proper network device naming. So
2596 * netdev->name and netdev->created_name need to be identical to not
2597 * trigger another rename later on.
2598 */
2599 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2600 if (retlen < IFNAMSIZ)
2601 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2602 if (retlen >= IFNAMSIZ) {
2603 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2604 return -E2BIG;
2605 }
811ef482 2606
74c6e2b0 2607 /* netdev->ifindex */
811ef482 2608 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2609 if (!token) {
2610 ERROR("Failed to parse lxc-user-nic output");
811ef482 2611 return -1;
74c6e2b0 2612 }
811ef482 2613
74c6e2b0
CB
2614 ret = lxc_safe_int(token, &netdev->ifindex);
2615 if (ret < 0) {
6d1400b5 2616 errno = -ret;
2617 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2618 return -1;
2619 }
2620
74c6e2b0 2621 /* netdev->priv.veth_attr.veth1 */
811ef482 2622 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2623 if (!token) {
2624 ERROR("Failed to parse lxc-user-nic output");
811ef482 2625 return -1;
74c6e2b0 2626 }
811ef482 2627
94b1cade
DJ
2628 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2629 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2630 ERROR("Host side veth device name returned by lxc-user-nic is "
2631 "too long");
2632 return -E2BIG;
2633 }
74c6e2b0
CB
2634
2635 /* netdev->priv.veth_attr.ifindex */
2636 token = strtok_r(NULL, ":", &saveptr);
2637 if (!token) {
2638 ERROR("Failed to parse lxc-user-nic output");
2639 return -1;
2640 }
2641
2642 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2643 if (ret < 0) {
6d1400b5 2644 errno = -ret;
2645 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2646 return -1;
2647 }
2648
4d781681 2649 if (netdev->upscript) {
2650 char *argv[] = {
2651 "veth",
2652 netdev->link,
2653 netdev->priv.veth_attr.veth1,
2654 NULL,
2655 };
2656
e389f2af
CB
2657 ret = run_script_argv(lxcname, hooks_version, "net",
2658 netdev->upscript, "up", argv);
4d781681 2659 if (ret < 0)
2660 return -1;
2661 }
2662
811ef482
CB
2663 return 0;
2664}
2665
f0ecc19d 2666static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2667 struct lxc_netdev *netdev,
2668 const char *netns_path)
811ef482
CB
2669{
2670 int bytes, ret;
2671 pid_t child;
2672 int pipefd[2];
419590da 2673 char buffer[PATH_MAX] = {0};
811ef482
CB
2674
2675 if (netdev->type != LXC_NET_VETH) {
2676 ERROR("Network type %d not support for unprivileged use", netdev->type);
2677 return -1;
2678 }
2679
2680 ret = pipe(pipefd);
2681 if (ret < 0) {
2682 SYSERROR("Failed to create pipe");
2683 return -1;
2684 }
2685
2686 child = fork();
2687 if (child < 0) {
2688 SYSERROR("Failed to create new process");
2689 close(pipefd[0]);
2690 close(pipefd[1]);
2691 return -1;
2692 }
2693
2694 if (child == 0) {
8843fde4 2695 char *hostveth;
811ef482
CB
2696
2697 close(pipefd[0]);
2698
2699 ret = dup2(pipefd[1], STDOUT_FILENO);
2700 if (ret >= 0)
2701 ret = dup2(pipefd[1], STDERR_FILENO);
2702 close(pipefd[1]);
2703 if (ret < 0) {
2704 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2705 _exit(EXIT_FAILURE);
811ef482
CB
2706 }
2707
8843fde4
CB
2708 if (netdev->priv.veth_attr.pair[0] != '\0')
2709 hostveth = netdev->priv.veth_attr.pair;
2710 else
2711 hostveth = netdev->priv.veth_attr.veth1;
2712 if (hostveth[0] == '\0') {
74c6e2b0 2713 SYSERROR("Host side veth device name is missing");
a30b9023 2714 _exit(EXIT_FAILURE);
74c6e2b0
CB
2715 }
2716
de4855a8 2717 if (netdev->link[0] == '\0') {
811ef482 2718 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2719 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2720 _exit(EXIT_FAILURE);
74c6e2b0 2721 }
811ef482 2722
811ef482 2723 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2724 lxcname, netns_path, netdev->link, hostveth);
811ef482 2725 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2726 lxcname, netns_path, "veth", netdev->link, hostveth,
2727 (char *)NULL);
811ef482 2728 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2729 _exit(EXIT_FAILURE);
811ef482
CB
2730 }
2731
2732 close(pipefd[1]);
2733
419590da 2734 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2735 if (bytes < 0) {
2736 SYSERROR("Failed to read from pipe file descriptor.");
2737 close(pipefd[0]);
6b9f82a9
CB
2738 } else {
2739 buffer[bytes - 1] = '\0';
811ef482 2740 }
811ef482 2741
6b9f82a9
CB
2742 ret = wait_for_pid(child);
2743 close(pipefd[0]);
2744 if (ret != 0 || bytes < 0) {
811ef482
CB
2745 ERROR("lxc-user-nic failed to delete requested network: %s",
2746 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2747 return -1;
2748 }
2749
811ef482
CB
2750 return 0;
2751}
2752
1bd8d726
CB
2753bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2754{
2755 int ret;
2756 struct lxc_list *iterator;
2757 struct lxc_list *network = &handler->conf->network;
2758 /* strlen("/proc/") = 6
2759 * +
8335fd40 2760 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2761 * +
2762 * strlen("/fd/") = 4
2763 * +
8335fd40 2764 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2765 * +
2766 * \0
2767 */
8335fd40 2768 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2769
2770 *netns_path = '\0';
2771
28d9e29e 2772 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2773 DEBUG("Cannot not guarantee safe deletion of network devices. "
2774 "Manual cleanup maybe needed");
2775 return false;
2776 }
2777
2778 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2779 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2780 if (ret < 0 || ret >= sizeof(netns_path))
2781 return false;
2782
2783 lxc_list_for_each(iterator, network) {
2784 char *hostveth = NULL;
2785 struct lxc_netdev *netdev = iterator->elem;
2786
2787 /* We can only delete devices whose ifindex we have. If we don't
2788 * have the index it means that we didn't create it.
2789 */
2790 if (!netdev->ifindex)
2791 continue;
2792
2793 if (netdev->type == LXC_NET_PHYS) {
2794 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2795 netdev->link);
2796 if (ret < 0)
2797 WARN("Failed to rename interface with index %d "
2798 "to its initial name \"%s\"",
2799 netdev->ifindex, netdev->link);
2800 else
2801 TRACE("Renamed interface with index %d to its "
2802 "initial name \"%s\"",
2803 netdev->ifindex, netdev->link);
b3259dc6
TP
2804
2805 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2806 goto clear_ifindices;
1bd8d726
CB
2807 }
2808
2809 ret = netdev_deconf[netdev->type](handler, netdev);
2810 if (ret < 0)
2811 WARN("Failed to deconfigure network device");
2812
2813 if (netdev->type != LXC_NET_VETH)
66a7c406 2814 goto clear_ifindices;
1bd8d726 2815
c869be20 2816 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2817 goto clear_ifindices;
1bd8d726 2818
8843fde4
CB
2819 if (netdev->priv.veth_attr.pair[0] != '\0')
2820 hostveth = netdev->priv.veth_attr.pair;
2821 else
2822 hostveth = netdev->priv.veth_attr.veth1;
2823 if (hostveth[0] == '\0')
66a7c406 2824 goto clear_ifindices;
8843fde4 2825
1bd8d726
CB
2826 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2827 handler->name, netdev,
2828 netns_path);
2829 if (ret < 0) {
1bd8d726 2830 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2831 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2832 goto clear_ifindices;
1bd8d726
CB
2833 }
2834 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2835 netdev->link);
66a7c406
CB
2836
2837clear_ifindices:
ad2ddfcd 2838 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2839 * have cached stale data which would cause it to fail on reboot
2840 * we're we don't re-read the on-disk config file.
2841 */
2842 netdev->ifindex = 0;
2843 if (netdev->type == LXC_NET_PHYS) {
2844 netdev->priv.phys_attr.ifindex = 0;
2845 } else if (netdev->type == LXC_NET_VETH) {
2846 netdev->priv.veth_attr.veth1[0] = '\0';
2847 netdev->priv.veth_attr.ifindex = 0;
2848 }
1bd8d726
CB
2849 }
2850
bb84beda 2851 return true;
1bd8d726
CB
2852}
2853
6509154d 2854struct ip_proxy_args {
2855 const char *ip;
2856 const char *dev;
2857};
2858
2859static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2860{
2861 struct ip_proxy_args *args = data;
2862
2863 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2864 return -1;
2865}
2866
2867static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2868{
2869 struct ip_proxy_args *args = data;
2870
2871 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2872 return -1;
2873}
2874
2875static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2876{
2877 int ret;
2878 char cmd_output[PATH_MAX];
2879 struct ip_proxy_args args = {
2880 .ip = ip,
2881 .dev = dev,
2882 };
2883
2884 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2885 if (ret < 0) {
2886 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2887 return -1;
2888 }
2889
2890 return 0;
2891}
2892
2893static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2894{
2895 int ret;
2896 char cmd_output[PATH_MAX];
2897 struct ip_proxy_args args = {
2898 .ip = ip,
2899 .dev = dev,
2900 };
2901
2902 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2903 if (ret < 0) {
2904 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2905 return -1;
2906 }
2907
2908 return 0;
2909}
2910
2911static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2912 struct lxc_list *cur, *next;
2913 struct lxc_inetdev *inet4dev;
2914 struct lxc_inet6dev *inet6dev;
2915 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2916 int err = 0;
2917 unsigned int lo_ifindex = 0;
6509154d 2918
2919 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2920 if (!lxc_list_empty(&netdev->ipv4)) {
2921 /* Check for net.ipv4.conf.[link].forwarding=1 */
2922 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2923 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2924 return minus_one_set_errno(EINVAL);
2925 }
2926 }
2927
2928 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2929 if (!lxc_list_empty(&netdev->ipv6)) {
2930 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2931 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2932 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2933 return minus_one_set_errno(EINVAL);
2934 }
2935
2936 /* Check for net.ipv6.conf.[link].forwarding=1 */
2937 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2938 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2939 return minus_one_set_errno(EINVAL);
2940 }
2941 }
2942
b670016a 2943 /* Perform IPVLAN specific checks. */
2944 if (netdev->type == LXC_NET_IPVLAN) {
2945 /* Check mode is l3s as other modes do not work with l2proxy. */
2946 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2947 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2948 return minus_one_set_errno(EINVAL);
2949 }
2950
2951 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2952 lo_ifindex = if_nametoindex(loop_device);
b670016a 2953 if (lo_ifindex == 0) {
3ebffb98 2954 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2955 return minus_one_set_errno(EINVAL);
2956 }
2957 }
2958
6509154d 2959 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2960 inet4dev = cur->elem;
2961 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2962 return minus_one_set_errno(-errno);
2963
2964 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2965 return minus_one_set_errno(EINVAL);
b670016a 2966
2967 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2968 if (netdev->type == LXC_NET_IPVLAN) {
2969 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2970 if (err < 0) {
3ebffb98 2971 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2972 return minus_one_set_errno(-err);
2973 }
2974 }
6509154d 2975 }
2976
2977 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2978 inet6dev = cur->elem;
2979 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2980 return minus_one_set_errno(-errno);
2981
2982 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2983 return minus_one_set_errno(EINVAL);
b670016a 2984
2985 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2986 if (netdev->type == LXC_NET_IPVLAN) {
2987 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2988 if (err < 0) {
3ebffb98 2989 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 2990 return minus_one_set_errno(-err);
2991 }
2992 }
6509154d 2993 }
2994
2995 return 0;
2996}
2997
b670016a 2998static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2999 char bufinet4[INET_ADDRSTRLEN];
3000 unsigned int errCount = 0;
3001
3002 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3003 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3004 return minus_one_set_errno(EINVAL);
3005 }
3006
3007 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3008 if (lo_ifindex > 0) {
3009 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3010 errCount++;
3011 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3012 }
3013 }
3014
3015 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3016 if (link[0] != '\0') {
3017 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3018 errCount++;
3019 }
3020
3021 if (errCount > 0)
3022 return minus_one_set_errno(EINVAL);
3023
3024 return 0;
3025}
3026
3027static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3028 char bufinet6[INET6_ADDRSTRLEN];
3029 unsigned int errCount = 0;
3030
3031 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3032 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3033 return minus_one_set_errno(EINVAL);
3034 }
3035
3036 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3037 if (lo_ifindex > 0) {
3038 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3039 errCount++;
3040 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3041 }
3042 }
3043
3044 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3045 if (link[0] != '\0') {
3046 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3047 errCount++;
3048 }
3049
3050 if (errCount > 0)
3051 return minus_one_set_errno(EINVAL);
3052
3053 return 0;
3054}
3055
6509154d 3056static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3057 unsigned int lo_ifindex = 0;
3058 unsigned int errCount = 0;
6509154d 3059 struct lxc_list *cur, *next;
3060 struct lxc_inetdev *inet4dev;
3061 struct lxc_inet6dev *inet6dev;
6509154d 3062
b670016a 3063 /* Perform IPVLAN specific checks. */
3064 if (netdev->type == LXC_NET_IPVLAN) {
3065 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3066 lo_ifindex = if_nametoindex(loop_device);
b670016a 3067 if (lo_ifindex == 0) {
3068 errCount++;
3ebffb98 3069 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3070 }
b670016a 3071 }
6509154d 3072
b670016a 3073 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3074 inet4dev = cur->elem;
3075 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3076 errCount++;
6509154d 3077 }
3078
3079 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3080 inet6dev = cur->elem;
b670016a 3081 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3082 errCount++;
6509154d 3083 }
3084
b670016a 3085 if (errCount > 0)
6509154d 3086 return minus_one_set_errno(EINVAL);
3087
3088 return 0;
3089}
3090
e389f2af 3091static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3092{
811ef482
CB
3093 struct lxc_list *iterator;
3094 struct lxc_list *network = &handler->conf->network;
3095
811ef482
CB
3096 lxc_list_for_each(iterator, network) {
3097 struct lxc_netdev *netdev = iterator->elem;
3098
3099 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3100 ERROR("Invalid network configuration type %d", netdev->type);
3101 return -1;
3102 }
3103
6509154d 3104 /* Setup l2proxy entries if enabled and used with a link property */
3105 if (netdev->l2proxy && netdev->link[0] != '\0') {
3106 if (lxc_setup_l2proxy(netdev)) {
3107 ERROR("Failed to setup l2proxy");
3108 return -1;
3109 }
3110 }
3111
811ef482
CB
3112 if (netdev_conf[netdev->type](handler, netdev)) {
3113 ERROR("Failed to create network device");
3114 return -1;
3115 }
811ef482
CB
3116 }
3117
3118 return 0;
3119}
3120
e389f2af 3121int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3122{
e389f2af
CB
3123 pid_t pid = handler->pid;
3124 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3125 struct lxc_list *iterator;
3126
e0010464 3127 if (am_guest_unpriv())
74c6e2b0 3128 return 0;
811ef482
CB
3129
3130 lxc_list_for_each(iterator, network) {
e389f2af
CB
3131 int ret;
3132 char ifname[IFNAMSIZ];
811ef482
CB
3133 struct lxc_netdev *netdev = iterator->elem;
3134
811ef482
CB
3135 if (!netdev->ifindex)
3136 continue;
3137
3138 /* retrieve the name of the interface */
3139 if (!if_indextoname(netdev->ifindex, ifname)) {
3140 ERROR("No interface corresponding to ifindex \"%d\"",
3141 netdev->ifindex);
3142 return -1;
3143 }
3144
535e8859
CB
3145 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3146 if (ret) {
6d1400b5 3147 errno = -ret;
e389f2af
CB
3148 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3149 ifname, pid);
811ef482
CB
3150 return -1;
3151 }
3152
e389f2af
CB
3153 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3154
3155 DEBUG("Moved network device \"%s\" to network namespace of %d",
3156 netdev->created_name, pid);
811ef482
CB
3157 }
3158
3159 return 0;
3160}
3161
e389f2af 3162static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3163{
e389f2af
CB
3164 int hooks_version = handler->conf->hooks_version;
3165 const char *lxcname = handler->name;
3166 const char *lxcpath = handler->lxcpath;
3167 struct lxc_list *network = &handler->conf->network;
3168 pid_t pid = handler->pid;
74c6e2b0
CB
3169 struct lxc_list *iterator;
3170
74c6e2b0
CB
3171 lxc_list_for_each(iterator, network) {
3172 struct lxc_netdev *netdev = iterator->elem;
3173
3174 if (netdev->type == LXC_NET_EMPTY)
3175 continue;
3176
3177 if (netdev->type == LXC_NET_NONE)
3178 continue;
3179
3180 if (netdev->type != LXC_NET_VETH) {
e389f2af 3181 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3182 lxc_net_type_to_str(netdev->type));
3183 return -1;
3184 }
3185
3186 if (netdev->mtu)
3187 INFO("mtu ignored due to insufficient privilege");
3188
e389f2af
CB
3189 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3190 pid, hooks_version))
74c6e2b0
CB
3191 return -1;
3192 }
3193
3194 return 0;
3195}
3196
1bd8d726 3197bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3198{
3199 int ret;
3200 struct lxc_list *iterator;
3201 struct lxc_list *network = &handler->conf->network;
1bd8d726 3202
811ef482
CB
3203 lxc_list_for_each(iterator, network) {
3204 char *hostveth = NULL;
3205 struct lxc_netdev *netdev = iterator->elem;
3206
3207 /* We can only delete devices whose ifindex we have. If we don't
3208 * have the index it means that we didn't create it.
3209 */
3210 if (!netdev->ifindex)
3211 continue;
3212
6509154d 3213 /* Delete l2proxy entries if enabled and used with a link property */
3214 if (netdev->l2proxy && netdev->link[0] != '\0') {
3215 if (lxc_delete_l2proxy(netdev))
3216 WARN("Failed to delete all l2proxy config");
3217 /* Don't return, let the network be cleaned up as normal. */
3218 }
3219
811ef482
CB
3220 if (netdev->type == LXC_NET_PHYS) {
3221 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3222 if (ret < 0)
3223 WARN("Failed to rename interface with index %d "
b809f232
CB
3224 "from \"%s\" to its initial name \"%s\"",
3225 netdev->ifindex, netdev->name, netdev->link);
0b154989 3226 else {
29589196
CB
3227 TRACE("Renamed interface with index %d from "
3228 "\"%s\" to its initial name \"%s\"",
3229 netdev->ifindex, netdev->name,
3230 netdev->link);
0b154989
TP
3231
3232 /* Restore original MTU */
3233 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3234 if (ret < 0) {
3235 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3236 netdev->link, netdev->priv.phys_attr.mtu);
3237 } else {
3238 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3239 netdev->link, netdev->priv.phys_attr.mtu);
3240 }
3241 }
b3259dc6
TP
3242
3243 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3244 goto clear_ifindices;
811ef482
CB
3245 }
3246
3247 ret = netdev_deconf[netdev->type](handler, netdev);
3248 if (ret < 0)
3249 WARN("Failed to deconfigure network device");
3250
3251 /* Recent kernels remove the virtual interfaces when the network
3252 * namespace is destroyed but in case we did not move the
3253 * interface to the network namespace, we have to destroy it.
3254 */
1bd8d726 3255 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3256 if (ret < 0) {
3257 if (errno != ENODEV) {
3258 WARN("Failed to remove interface \"%s\" with index %d",
3259 netdev->name[0] != '\0' ? netdev->name : "(null)",
3260 netdev->ifindex);
3261 goto clear_ifindices;
3262 }
3263 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3264 netdev->name[0] != '\0' ? netdev->name : "(null)",
3265 netdev->ifindex);
811ef482 3266 }
1bd8d726 3267 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3268 netdev->name[0] != '\0' ? netdev->name : "(null)",
3269 netdev->ifindex);
811ef482
CB
3270
3271 if (netdev->type != LXC_NET_VETH)
66a7c406 3272 goto clear_ifindices;
811ef482 3273
811ef482
CB
3274 /* Explicitly delete host veth device to prevent lingering
3275 * devices. We had issues in LXD around this.
3276 */
de4855a8 3277 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3278 hostveth = netdev->priv.veth_attr.pair;
3279 else
3280 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3281 if (hostveth[0] == '\0')
66a7c406 3282 goto clear_ifindices;
811ef482
CB
3283
3284 ret = lxc_netdev_delete_by_name(hostveth);
3285 if (ret < 0) {
24548539
CB
3286 WARN("Failed to remove interface \"%s\" from \"%s\"",
3287 hostveth, netdev->link);
66a7c406 3288 goto clear_ifindices;
811ef482
CB
3289 }
3290 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3291
c869be20 3292 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3293 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3294 netdev->ifindex = 0;
3295 netdev->priv.veth_attr.ifindex = 0;
3296 goto clear_ifindices;
811ef482
CB
3297 }
3298
3299 /* Delete the openvswitch port. */
3300 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3301 if (ret < 0)
3302 WARN("Failed to remove port \"%s\" from openvswitch "
3303 "bridge \"%s\"", hostveth, netdev->link);
3304 else
3305 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3306 hostveth, netdev->link);
3307
66a7c406 3308clear_ifindices:
ad2ddfcd 3309 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3310 * have cached stale data which would cause it to fail on reboot
3311 * we're we don't re-read the on-disk config file.
3312 */
3313 netdev->ifindex = 0;
3314 if (netdev->type == LXC_NET_PHYS) {
3315 netdev->priv.phys_attr.ifindex = 0;
3316 } else if (netdev->type == LXC_NET_VETH) {
3317 netdev->priv.veth_attr.veth1[0] = '\0';
3318 netdev->priv.veth_attr.ifindex = 0;
3319 }
811ef482
CB
3320 }
3321
bb84beda 3322 return true;
811ef482
CB
3323}
3324
3325int lxc_requests_empty_network(struct lxc_handler *handler)
3326{
3327 struct lxc_list *network = &handler->conf->network;
3328 struct lxc_list *iterator;
3329 bool found_none = false, found_nic = false;
3330
3331 if (lxc_list_empty(network))
3332 return 0;
3333
3334 lxc_list_for_each(iterator, network) {
3335 struct lxc_netdev *netdev = iterator->elem;
3336
3337 if (netdev->type == LXC_NET_NONE)
3338 found_none = true;
3339 else
3340 found_nic = true;
3341 }
3342 if (found_none && !found_nic)
3343 return 1;
3344 return 0;
3345}
3346
3347/* try to move physical nics to the init netns */
b809f232 3348int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3349{
3350 int ret;
b809f232 3351 int oldfd;
811ef482 3352 char ifname[IFNAMSIZ];
b809f232 3353 struct lxc_list *iterator;
28d9e29e 3354 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3355 struct lxc_conf *conf = handler->conf;
811ef482 3356
b809f232
CB
3357 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3358 * the parent network namespace. We won't have this capability if we are
3359 * unprivileged.
3360 */
d0fbc7ba 3361 if (!handler->am_root)
b809f232 3362 return 0;
811ef482 3363
b809f232 3364 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3365
0037ab49 3366 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3367 if (oldfd < 0) {
3368 SYSERROR("Failed to preserve network namespace");
b809f232 3369 return -1;
811ef482
CB
3370 }
3371
b809f232 3372 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3373 if (ret < 0) {
3374 SYSERROR("Failed to enter network namespace");
3375 close(oldfd);
b809f232 3376 return -1;
811ef482
CB
3377 }
3378
b809f232
CB
3379 lxc_list_for_each(iterator, &conf->network) {
3380 struct lxc_netdev *netdev = iterator->elem;
811ef482 3381
b809f232
CB
3382 if (netdev->type != LXC_NET_PHYS)
3383 continue;
3384
3385 /* Retrieve the name of the interface in the container's network
3386 * namespace.
3387 */
3388 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3389 WARN("No interface corresponding to ifindex %d",
b809f232 3390 netdev->ifindex);
811ef482
CB
3391 continue;
3392 }
b809f232 3393
0037ab49 3394 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3395 if (ret < 0)
811ef482
CB
3396 WARN("Error moving network device \"%s\" back to "
3397 "network namespace", ifname);
b809f232
CB
3398 else
3399 TRACE("Moved network device \"%s\" back to network "
3400 "namespace", ifname);
811ef482 3401 }
811ef482 3402
b809f232 3403 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3404 close(oldfd);
b809f232
CB
3405 if (ret < 0) {
3406 SYSERROR("Failed to enter network namespace");
3407 return -1;
3408 }
3409
3410 return 0;
811ef482
CB
3411}
3412
3413static int setup_hw_addr(char *hwaddr, const char *ifname)
3414{
3415 struct sockaddr sockaddr;
3416 struct ifreq ifr;
6d1400b5 3417 int ret, fd;
811ef482
CB
3418
3419 ret = lxc_convert_mac(hwaddr, &sockaddr);
3420 if (ret) {
6d1400b5 3421 errno = -ret;
3422 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3423 return -1;
3424 }
3425
3426 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3427 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3428 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3429
ad9429e5 3430 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3431 if (fd < 0)
3432 return -1;
3433
3434 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3435 if (ret)
6d1400b5 3436 SYSERROR("Failed to perform ioctl");
3437
3438 close(fd);
811ef482
CB
3439
3440 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3441 ifr.ifr_name);
3442
3443 return ret;
3444}
3445
3446static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3447{
3448 struct lxc_list *iterator;
3449 int err;
3450
3451 lxc_list_for_each(iterator, ip) {
3452 struct lxc_inetdev *inetdev = iterator->elem;
3453
3454 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3455 &inetdev->bcast, inetdev->prefix);
3456 if (err) {
6d1400b5 3457 errno = -err;
3458 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3459 "with ifindex %d", ifindex);
811ef482
CB
3460 return -1;
3461 }
3462 }
3463
3464 return 0;
3465}
3466
3467static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3468{
3469 struct lxc_list *iterator;
3470 int err;
3471
3472 lxc_list_for_each(iterator, ip) {
3473 struct lxc_inet6dev *inet6dev = iterator->elem;
3474
3475 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3476 &inet6dev->mcast, &inet6dev->acast,
3477 inet6dev->prefix);
3478 if (err) {
6d1400b5 3479 errno = -err;
3480 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3481 "with ifindex %d", ifindex);
811ef482
CB
3482 return -1;
3483 }
3484 }
3485
3486 return 0;
3487}
3488
3489static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3490{
3491 char ifname[IFNAMSIZ];
3492 int err;
811ef482 3493 char *current_ifname = ifname;
009d6127 3494 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3495
3496 /* empty network namespace */
3497 if (!netdev->ifindex) {
3498 if (netdev->flags & IFF_UP) {
3499 err = lxc_netdev_up("lo");
3500 if (err) {
6d1400b5 3501 errno = -err;
3502 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3503 return -1;
3504 }
3505 }
3506
3507 if (netdev->type == LXC_NET_EMPTY)
3508 return 0;
3509
3510 if (netdev->type == LXC_NET_NONE)
3511 return 0;
3512
e389f2af
CB
3513 netdev->ifindex = if_nametoindex(netdev->created_name);
3514 if (!netdev->ifindex)
3515 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3516 netdev->name ?: "(null)");
811ef482
CB
3517 }
3518
3519 /* get the new ifindex in case of physical netdev */
3520 if (netdev->type == LXC_NET_PHYS) {
3521 netdev->ifindex = if_nametoindex(netdev->link);
3522 if (!netdev->ifindex) {
3523 ERROR("Failed to get ifindex for network device \"%s\"",
3524 netdev->link);
3525 return -1;
3526 }
3527 }
3528
3529 /* retrieve the name of the interface */
3530 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3531 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3532 netdev->ifindex);
811ef482
CB
3533 return -1;
3534 }
3535
e389f2af 3536 /* Default: let the system choose an interface name.
811ef482
CB
3537 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3538 * netlink will replace the format specifier with an appropriate index.
3539 */
de4855a8
CB
3540 if (netdev->name[0] == '\0') {
3541 if (netdev->type == LXC_NET_PHYS)
94b1cade 3542 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3543 else
94b1cade 3544 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3545 }
811ef482
CB
3546
3547 /* rename the interface name */
e389f2af
CB
3548 if (strcmp(current_ifname, netdev->name) != 0) {
3549 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3550 if (err) {
6d1400b5 3551 errno = -err;
3552 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3553 current_ifname, netdev->name);
811ef482
CB
3554 return -1;
3555 }
e389f2af
CB
3556
3557 TRACE("Renamed network device from \"%s\" to \"%s\"",
3558 current_ifname, netdev->name);
811ef482
CB
3559 }
3560
3561 /* Re-read the name of the interface because its name has changed
3562 * and would be automatically allocated by the system
3563 */
3564 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3565 ERROR("Failed get name for network device with ifindex %d",
3566 netdev->ifindex);
3567 return -1;
3568 }
3569
790255cf
CB
3570 /* Now update the recorded name of the network device to reflect the
3571 * name of the network device in the child's network namespace. We will
3572 * later on send this information back to the parent.
3573 */
94b1cade 3574 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3575
811ef482
CB
3576 /* set a mac address */
3577 if (netdev->hwaddr) {
3578 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3579 ERROR("Failed to setup hw address for network device \"%s\"",
3580 current_ifname);
3581 return -1;
3582 }
3583 }
3584
3585 /* setup ipv4 addresses on the interface */
3586 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3587 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3588 current_ifname);
811ef482
CB
3589 return -1;
3590 }
3591
3592 /* setup ipv6 addresses on the interface */
3593 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3594 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3595 current_ifname);
811ef482
CB
3596 return -1;
3597 }
3598
3599 /* set the network device up */
3600 if (netdev->flags & IFF_UP) {
811ef482
CB
3601 err = lxc_netdev_up(current_ifname);
3602 if (err) {
6d1400b5 3603 errno = -err;
3604 SYSERROR("Failed to set network device \"%s\" up",
3605 current_ifname);
811ef482
CB
3606 return -1;
3607 }
3608
3609 /* the network is up, make the loopback up too */
3610 err = lxc_netdev_up("lo");
3611 if (err) {
6d1400b5 3612 errno = -err;
3613 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3614 return -1;
3615 }
3616 }
3617
811ef482 3618 /* setup ipv4 gateway on the interface */
a2f9a670 3619 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3620 if (!(netdev->flags & IFF_UP)) {
3621 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3622 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3623 return -1;
3624 }
3625
3626 if (lxc_list_empty(&netdev->ipv4)) {
3627 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3628 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3629 return -1;
3630 }
3631
a2f9a670 3632 /* Setup device route if ipv4_gateway_dev is enabled */
3633 if (netdev->ipv4_gateway_dev) {
3634 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3635 if (err < 0) {
3636 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3637 current_ifname);
a2f9a670 3638 return minus_one_set_errno(-err);
811ef482 3639 }
a2f9a670 3640 } else {
009d6127 3641 /* Check the gateway address is valid */
3642 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3643 return minus_one_set_errno(errno);
3644
3645 /* Try adding a default route to the gateway address */
811ef482 3646 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3647 if (err < 0) {
3648 /* If adding the default route fails, this could be because the
3649 * gateway address is in a different subnet to the container's address.
3650 * To work around this, we try adding a static device route to the
3651 * gateway address first, and then try again.
3652 */
a2f9a670 3653 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3654 if (err < 0) {
a2f9a670 3655 errno = -err;
009d6127 3656 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3657 bufinet4, current_ifname);
009d6127 3658 return -1;
a2f9a670 3659 }
6d1400b5 3660
a2f9a670 3661 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3662 if (err < 0) {
a2f9a670 3663 errno = -err;
009d6127 3664 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3665 bufinet4, current_ifname);
a2f9a670 3666 return -1;
811ef482 3667 }
811ef482
CB
3668 }
3669 }
3670 }
3671
3672 /* setup ipv6 gateway on the interface */
a2f9a670 3673 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3674 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3675 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3676 current_ifname);
811ef482
CB
3677 return -1;
3678 }
3679
3680 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3681 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3682 current_ifname);
811ef482
CB
3683 return -1;
3684 }
3685
a2f9a670 3686 /* Setup device route if ipv6_gateway_dev is enabled */
3687 if (netdev->ipv6_gateway_dev) {
3688 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3689 if (err < 0) {
3690 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3691 current_ifname);
a2f9a670 3692 return minus_one_set_errno(-err);
811ef482 3693 }
a2f9a670 3694 } else {
009d6127 3695 /* Check the gateway address is valid */
3696 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3697 return minus_one_set_errno(errno);
3698
3699 /* Try adding a default route to the gateway address */
811ef482 3700 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3701 if (err < 0) {
3702 /* If adding the default route fails, this could be because the
3703 * gateway address is in a different subnet to the container's address.
3704 * To work around this, we try adding a static device route to the
3705 * gateway address first, and then try again.
3706 */
a2f9a670 3707 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3708 if (err < 0) {
a2f9a670 3709 errno = -err;
009d6127 3710 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3711 bufinet6, current_ifname);
009d6127 3712 return -1;
a2f9a670 3713 }
6d1400b5 3714
a2f9a670 3715 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3716 if (err < 0) {
a2f9a670 3717 errno = -err;
009d6127 3718 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3719 bufinet6, current_ifname);
a2f9a670 3720 return -1;
811ef482 3721 }
811ef482
CB
3722 }
3723 }
3724 }
3725
74c6e2b0 3726 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3727
3728 return 0;
3729}
3730
3731int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3732 struct lxc_list *network)
3733{
3734 struct lxc_list *iterator;
811ef482 3735
811ef482 3736 lxc_list_for_each(iterator, network) {
e389f2af 3737 struct lxc_netdev *netdev = iterator->elem;
811ef482 3738
811ef482 3739 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3740 ERROR("Failed to setup netdev");
811ef482
CB
3741 return -1;
3742 }
3743 }
3744
3745 if (!lxc_list_empty(network))
e389f2af 3746 INFO("Network has been setup");
811ef482
CB
3747
3748 return 0;
3749}
7ab1ba02
CB
3750
3751int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3752{
3753 struct lxc_list *iterator;
3754 struct lxc_list *network = &handler->conf->network;
3755 int data_sock = handler->data_sock[0];
3756
7ab1ba02
CB
3757 lxc_list_for_each(iterator, network) {
3758 int ret;
3759 struct lxc_netdev *netdev = iterator->elem;
3760
3761 if (netdev->type != LXC_NET_VETH)
3762 continue;
3763
7fbb15ec 3764 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3765 if (ret < 0)
7ab1ba02 3766 return -1;
e389f2af
CB
3767
3768 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3769 if (ret < 0)
3770 return -1;
3771
3772 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3773 }
3774
3775 return 0;
3776}
3777
3778int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3779{
3780 struct lxc_list *iterator;
3781 struct lxc_list *network = &handler->conf->network;
3782 int data_sock = handler->data_sock[1];
3783
7ab1ba02
CB
3784 lxc_list_for_each(iterator, network) {
3785 int ret;
3786 struct lxc_netdev *netdev = iterator->elem;
3787
3788 if (netdev->type != LXC_NET_VETH)
3789 continue;
3790
e3233f26 3791 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3792 if (ret < 0)
7ab1ba02 3793 return -1;
e389f2af
CB
3794
3795 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3796 if (ret < 0)
3797 return -1;
3798 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3799 }
3800
3801 return 0;
3802}
a1ae535a
CB
3803
3804int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3805{
3806 struct lxc_list *iterator, *network;
3807 int data_sock = handler->data_sock[0];
3808
3809 if (!handler->am_root)
3810 return 0;
3811
3812 network = &handler->conf->network;
3813 lxc_list_for_each(iterator, network) {
3814 int ret;
3815 struct lxc_netdev *netdev = iterator->elem;
3816
3817 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3818 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3819 if (ret < 0)
7729f8e5 3820 return -1;
a1ae535a
CB
3821
3822 /* Send network device ifindex in the child's namespace to
3823 * parent.
3824 */
7fbb15ec 3825 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3826 if (ret < 0)
7729f8e5 3827 return -1;
a1ae535a
CB
3828 }
3829
e389f2af
CB
3830 if (!lxc_list_empty(network))
3831 TRACE("Sent network device names and ifindices to parent");
3832
a1ae535a 3833 return 0;
a1ae535a
CB
3834}
3835
3836int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3837{
3838 struct lxc_list *iterator, *network;
3839 int data_sock = handler->data_sock[1];
3840
3841 if (!handler->am_root)
3842 return 0;
3843
3844 network = &handler->conf->network;
3845 lxc_list_for_each(iterator, network) {
3846 int ret;
3847 struct lxc_netdev *netdev = iterator->elem;
3848
3849 /* Receive network device name in the child's namespace to
3850 * parent.
3851 */
e3233f26 3852 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3853 if (ret < 0)
7729f8e5 3854 return -1;
a1ae535a
CB
3855
3856 /* Receive network device ifindex in the child's namespace to
3857 * parent.
3858 */
e3233f26 3859 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3860 if (ret < 0)
7729f8e5 3861 return -1;
a1ae535a
CB
3862 }
3863
3864 return 0;
a1ae535a 3865}
bb84beda
CB
3866
3867void lxc_delete_network(struct lxc_handler *handler)
3868{
3869 bool bret;
3870
3871 if (handler->am_root)
3872 bret = lxc_delete_network_priv(handler);
3873 else
3874 bret = lxc_delete_network_unpriv(handler);
3875 if (!bret)
3876 DEBUG("Failed to delete network devices");
3877 else
3878 DEBUG("Deleted network devices");
3879}
1cd95214 3880
1cd95214
CB
3881int lxc_netns_set_nsid(int fd)
3882{
41a3300d 3883 int ret;
0ce60f0d
CB
3884 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3885 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3886 NLMSG_ALIGN(1024)];
1cd95214 3887 struct nl_handler nlh;
0ce60f0d
CB
3888 struct nlmsghdr *hdr;
3889 struct rtgenmsg *msg;
bfcedc7e 3890 int saved_errno;
9d036caa
CB
3891 const __s32 ns_id = -1;
3892 const __u32 netns_fd = fd;
1cd95214
CB
3893
3894 ret = netlink_open(&nlh, NETLINK_ROUTE);
3895 if (ret < 0)
41a3300d 3896 return -1;
1cd95214 3897
0ce60f0d 3898 memset(buf, 0, sizeof(buf));
6ce39620
CB
3899
3900#pragma GCC diagnostic push
3901#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3902 hdr = (struct nlmsghdr *)buf;
3903 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3904#pragma GCC diagnostic pop
1cd95214 3905
0ce60f0d
CB
3906 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3907 hdr->nlmsg_type = RTM_NEWNSID;
3908 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3909 hdr->nlmsg_pid = 0;
3910 hdr->nlmsg_seq = RTM_NEWNSID;
3911 msg->rtgen_family = AF_UNSPEC;
1cd95214 3912
9d036caa
CB
3913 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3914 if (ret < 0)
3915 goto on_error;
3916
3917 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3918 if (ret < 0)
3919 goto on_error;
1cd95214 3920
9fbbc427 3921 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3922
3923on_error:
bfcedc7e 3924 saved_errno = errno;
1cd95214 3925 netlink_close(&nlh);
bfcedc7e 3926 errno = saved_errno;
1cd95214 3927
9d036caa 3928 return ret;
1cd95214 3929}
938980ba
CB
3930
3931static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3932{
3933
3934 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3935
3936 while (RTA_OK(rta, len)) {
3937 unsigned short type = rta->rta_type;
3938
3939 if ((type <= max) && (!tb[type]))
3940 tb[type] = rta;
3941
6ce39620
CB
3942#pragma GCC diagnostic push
3943#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3944 rta = RTA_NEXT(rta, len);
6ce39620 3945#pragma GCC diagnostic pop
938980ba
CB
3946 }
3947
3948 return 0;
3949}
3950
3951static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3952{
3953 return *(__s32 *)RTA_DATA(rta);
3954}
3955
3956#ifndef NETNS_RTA
3957#define NETNS_RTA(r) \
3958 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3959#endif
3960
3961int lxc_netns_get_nsid(int fd)
3962{
3963 int ret;
3964 ssize_t len;
3965 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3966 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3967 NLMSG_ALIGN(1024)];
938980ba
CB
3968 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3969 struct nl_handler nlh;
3970 struct nlmsghdr *hdr;
3971 struct rtgenmsg *msg;
3972 int saved_errno;
3973 __u32 netns_fd = fd;
3974
3975 ret = netlink_open(&nlh, NETLINK_ROUTE);
3976 if (ret < 0)
3977 return -1;
3978
3979 memset(buf, 0, sizeof(buf));
6ce39620
CB
3980
3981#pragma GCC diagnostic push
3982#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3983 hdr = (struct nlmsghdr *)buf;
3984 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3985#pragma GCC diagnostic pop
938980ba
CB
3986
3987 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3988 hdr->nlmsg_type = RTM_GETNSID;
3989 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3990 hdr->nlmsg_pid = 0;
3991 hdr->nlmsg_seq = RTM_GETNSID;
3992 msg->rtgen_family = AF_UNSPEC;
3993
9d036caa
CB
3994 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3995 if (ret == 0)
3996 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3997
938980ba
CB
3998 saved_errno = errno;
3999 netlink_close(&nlh);
4000 errno = saved_errno;
4001 if (ret < 0)
4002 return -1;
4003
9d036caa 4004 errno = EINVAL;
938980ba
CB
4005 msg = NLMSG_DATA(hdr);
4006 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4007 if (len < 0)
4008 return -1;
4009
6ce39620
CB
4010#pragma GCC diagnostic push
4011#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4012 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4013 if (tb[__LXC_NETNSA_NSID])
4014 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4015#pragma GCC diagnostic pop
938980ba
CB
4016
4017 return -1;
4018}
e389f2af
CB
4019
4020int lxc_create_network(struct lxc_handler *handler)
4021{
4022 int ret;
4023
4024 /*
4025 * Find gateway addresses from the link device, which is no longer
4026 * accessible inside the container. Do this before creating network
4027 * interfaces, since goto out_delete_net does not work before
4028 * lxc_clone.
4029 */
4030 ret = lxc_find_gateway_addresses(handler);
4031 if (ret) {
4032 ERROR("Failed to find gateway addresses");
4033 return -1;
4034 }
4035
4036 if (handler->am_root) {
4037 ret = lxc_create_network_priv(handler);
4038 if (ret)
4039 return -1;
4040
4041 return lxc_network_move_created_netdev_priv(handler);
4042 }
4043
4044 return lxc_create_network_unpriv(handler);
4045}