]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: stash created_name in instantiate_macvlan()
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 71static const char loop_device[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
8021de25 335 char peer[IFNAMSIZ];
811ef482 336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
8021de25
CB
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
346 return -1;
347
8021de25 348 if (!lxc_mkifname(peer))
811ef482
CB
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
6d1400b5 354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
966e9f1f 357 goto on_error;
811ef482
CB
358 }
359
a9704f05
CB
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
811ef482
CB
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 365 goto on_error;
811ef482
CB
366 }
367
3bef7b7b
TP
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
811ef482 384 if (netdev->upscript) {
14a7b0f9
CB
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
966e9f1f 395 goto on_error;
811ef482
CB
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
966e9f1f
CB
402
403on_error:
811ef482 404 lxc_netdev_delete_by_name(peer);
811ef482
CB
405 return -1;
406}
407
c9f52382 408static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409{
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493}
494
495static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496{
497 char peerbuf[IFNAMSIZ], *peer;
498 int err;
006e135e 499 unsigned int mtu = 0;
c9f52382 500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
506 err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peerbuf))
508 return -1;
509
510 peer = lxc_mkifname(peerbuf);
511 if (!peer)
512 return -1;
513
514 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
515 if (err) {
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
517 goto on_error;
518 }
519
520 netdev->ifindex = if_nametoindex(peer);
521 if (!netdev->ifindex) {
522 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
523 goto on_error;
524 }
525
006e135e 526 if (netdev->mtu) {
527 err = lxc_safe_uint(netdev->mtu, &mtu);
528 if (err < 0) {
529 errno = -err;
530 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
531 goto on_error;
532 }
533
534 err = lxc_netdev_set_mtu(peer, mtu);
535 if (err < 0) {
536 errno = -err;
537 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
538 goto on_error;
539 }
540 }
541
c9f52382 542 if (netdev->upscript) {
543 char *argv[] = {
544 "ipvlan",
545 netdev->link,
546 NULL,
547 };
548
549 err = run_script_argv(handler->name,
550 handler->conf->hooks_version, "net",
551 netdev->upscript, "up", argv);
552 if (err < 0)
553 goto on_error;
554 }
555
556 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
557 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
558
559 return 0;
560
561on_error:
562 lxc_netdev_delete_by_name(peer);
563 return -1;
564}
565
811ef482
CB
566static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
567{
568 char peer[IFNAMSIZ];
569 int err;
570 static uint16_t vlan_cntr = 0;
571 unsigned int mtu = 0;
572
de4855a8 573 if (netdev->link[0] == '\0') {
811ef482
CB
574 ERROR("No link for vlan network device specified");
575 return -1;
576 }
577
578 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
579 if (err < 0 || (size_t)err >= sizeof(peer))
580 return -1;
581
582 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
583 if (err) {
6d1400b5 584 errno = -err;
585 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
586 peer, netdev->link);
811ef482
CB
587 return -1;
588 }
589
590 netdev->ifindex = if_nametoindex(peer);
591 if (!netdev->ifindex) {
592 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 593 goto on_error;
594 }
595
596 if (netdev->mtu) {
597 err = lxc_safe_uint(netdev->mtu, &mtu);
598 if (err < 0) {
599 errno = -err;
600 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
601 goto on_error;
602 }
603
604 err = lxc_netdev_set_mtu(peer, mtu);
605 if (err) {
606 errno = -err;
607 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
608 goto on_error;
609 }
811ef482
CB
610 }
611
3a73d9f1 612 if (netdev->upscript) {
613 char *argv[] = {
614 "vlan",
615 netdev->link,
616 NULL,
617 };
618
619 err = run_script_argv(handler->name,
620 handler->conf->hooks_version, "net",
621 netdev->upscript, "up", argv);
19abca58 622 if (err < 0) {
3e2a7b08 623 goto on_error;
19abca58 624 }
3a73d9f1 625 }
626
3bef7b7b 627 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
811ef482 628 peer, netdev->ifindex);
811ef482
CB
629
630 return 0;
3e2a7b08 631
632on_error:
633 lxc_netdev_delete_by_name(peer);
634 return -1;
811ef482
CB
635}
636
637static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
638{
0b154989 639 int err, mtu_orig = 0;
3bef7b7b 640 unsigned int mtu = 0;
14a7b0f9 641
de4855a8 642 if (netdev->link[0] == '\0') {
811ef482
CB
643 ERROR("No link for physical interface specified");
644 return -1;
645 }
646
790255cf
CB
647 /* Note that we're retrieving the container's ifindex in the host's
648 * network namespace because we need it to move the device from the
649 * host's network namespace to the container's network namespace later
650 * on.
651 * Note that netdev->link will contain the name of the physical network
652 * device in the host's namespace.
653 */
811ef482
CB
654 netdev->ifindex = if_nametoindex(netdev->link);
655 if (!netdev->ifindex) {
656 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
657 return -1;
658 }
659
790255cf
CB
660 /* Store the ifindex of the host's network device in the host's
661 * namespace.
662 */
663 netdev->priv.phys_attr.ifindex = netdev->ifindex;
664
0b154989
TP
665 /* Get original device MTU setting and store for restoration after container shutdown. */
666 mtu_orig = netdev_get_mtu(netdev->ifindex);
667 if (mtu_orig < 0) {
668 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
669 return minus_one_set_errno(-mtu_orig);
670 }
671
672 netdev->priv.phys_attr.mtu = mtu_orig;
673
3bef7b7b
TP
674 if (netdev->mtu) {
675 err = lxc_safe_uint(netdev->mtu, &mtu);
676 if (err < 0) {
677 errno = -err;
678 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
679 return -1;
680 }
14a7b0f9 681
3bef7b7b
TP
682 err = lxc_netdev_set_mtu(netdev->link, mtu);
683 if (err < 0) {
684 errno = -err;
685 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
686 return -1;
687 }
688 }
689
690 if (netdev->upscript) {
691 char *argv[] = {
692 "phys",
693 netdev->link,
694 NULL,
695 };
696
697 err = run_script_argv(handler->name,
698 handler->conf->hooks_version, "net",
699 netdev->upscript, "up", argv);
700 if (err < 0) {
701 return -1;
702 }
703 }
704
705 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
811ef482
CB
706
707 return 0;
708}
709
710static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
711{
14a7b0f9
CB
712 int ret;
713 char *argv[] = {
714 "empty",
715 NULL,
716 };
717
811ef482 718 netdev->ifindex = 0;
14a7b0f9
CB
719 if (!netdev->upscript)
720 return 0;
721
722 ret = run_script_argv(handler->name, handler->conf->hooks_version,
723 "net", netdev->upscript, "up", argv);
724 if (ret < 0)
725 return -1;
726
811ef482
CB
727 return 0;
728}
729
730static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
731{
732 netdev->ifindex = 0;
733 return 0;
734}
735
736static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
737 [LXC_NET_VETH] = instantiate_veth,
738 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 739 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
740 [LXC_NET_VLAN] = instantiate_vlan,
741 [LXC_NET_PHYS] = instantiate_phys,
742 [LXC_NET_EMPTY] = instantiate_empty,
743 [LXC_NET_NONE] = instantiate_none,
744};
745
746static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
747{
14a7b0f9
CB
748 int ret;
749 char *argv[] = {
750 "veth",
751 netdev->link,
752 NULL,
753 NULL,
754 };
755
756 if (!netdev->downscript)
757 return 0;
811ef482 758
de4855a8 759 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 760 argv[2] = netdev->priv.veth_attr.pair;
811ef482 761 else
14a7b0f9
CB
762 argv[2] = netdev->priv.veth_attr.veth1;
763
764 ret = run_script_argv(handler->name,
765 handler->conf->hooks_version, "net",
766 netdev->downscript, "down", argv);
767 if (ret < 0)
768 return -1;
811ef482 769
811ef482
CB
770 return 0;
771}
772
773static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
774{
14a7b0f9
CB
775 int ret;
776 char *argv[] = {
777 "macvlan",
778 netdev->link,
779 NULL,
780 };
781
782 if (!netdev->downscript)
783 return 0;
784
785 ret = run_script_argv(handler->name, handler->conf->hooks_version,
786 "net", netdev->downscript, "down", argv);
787 if (ret < 0)
788 return -1;
811ef482 789
811ef482
CB
790 return 0;
791}
792
c9f52382 793static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
794{
795 int ret;
796 char *argv[] = {
797 "ipvlan",
798 netdev->link,
799 NULL,
800 };
801
802 if (!netdev->downscript)
803 return 0;
804
805 ret = run_script_argv(handler->name, handler->conf->hooks_version,
806 "net", netdev->downscript, "down", argv);
807 if (ret < 0)
808 return -1;
809
810 return 0;
811}
812
811ef482
CB
813static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
814{
3a73d9f1 815 int ret;
816 char *argv[] = {
817 "vlan",
818 netdev->link,
819 NULL,
820 };
821
822 if (!netdev->downscript)
823 return 0;
824
825 ret = run_script_argv(handler->name, handler->conf->hooks_version,
826 "net", netdev->downscript, "down", argv);
827 if (ret < 0)
828 return -1;
829
811ef482
CB
830 return 0;
831}
832
833static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
834{
14a7b0f9
CB
835 int ret;
836 char *argv[] = {
837 "phys",
838 netdev->link,
839 NULL,
840 };
841
842 if (!netdev->downscript)
843 return 0;
844
845 ret = run_script_argv(handler->name, handler->conf->hooks_version,
846 "net", netdev->downscript, "down", argv);
847 if (ret < 0)
848 return -1;
811ef482 849
811ef482
CB
850 return 0;
851}
852
853static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
854{
14a7b0f9
CB
855 int ret;
856 char *argv[] = {
857 "empty",
858 NULL,
859 };
860
861 if (!netdev->downscript)
862 return 0;
863
864 ret = run_script_argv(handler->name, handler->conf->hooks_version,
865 "net", netdev->downscript, "down", argv);
866 if (ret < 0)
867 return -1;
811ef482 868
811ef482
CB
869 return 0;
870}
871
872static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
873{
874 return 0;
875}
876
877static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
878 [LXC_NET_VETH] = shutdown_veth,
879 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 880 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
881 [LXC_NET_VLAN] = shutdown_vlan,
882 [LXC_NET_PHYS] = shutdown_phys,
883 [LXC_NET_EMPTY] = shutdown_empty,
884 [LXC_NET_NONE] = shutdown_none,
885};
886
0037ab49
TP
887static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
888{
889 int err;
890 struct nl_handler nlh;
891 struct ifinfomsg *ifi;
892 struct nlmsg *nlmsg = NULL;
893
894 err = netlink_open(&nlh, NETLINK_ROUTE);
895 if (err)
896 return err;
897
898 err = -ENOMEM;
899 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
900 if (!nlmsg)
901 goto out;
902
903 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
904 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
905
906 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
907 if (!ifi)
908 goto out;
909 ifi->ifi_family = AF_UNSPEC;
910 ifi->ifi_index = ifindex;
911
912 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
913 goto out;
914
915 if (ifname != NULL) {
916 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
917 goto out;
918 }
919
920 err = netlink_transaction(&nlh, nlmsg, nlmsg);
921out:
922 netlink_close(&nlh);
923 nlmsg_free(nlmsg);
924 return err;
925}
926
ebc73a67 927int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 928{
ebc73a67 929 int err;
0ad19a3f 930 struct nl_handler nlh;
06f976ca 931 struct ifinfomsg *ifi;
ebc73a67 932 struct nlmsg *nlmsg = NULL;
0ad19a3f 933
3cfc0f3a
MN
934 err = netlink_open(&nlh, NETLINK_ROUTE);
935 if (err)
936 return err;
0ad19a3f 937
3cfc0f3a 938 err = -ENOMEM;
0ad19a3f 939 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
940 if (!nlmsg)
941 goto out;
942
ebc73a67 943 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
944 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
945
946 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
947 if (!ifi)
948 goto out;
06f976ca
SZ
949 ifi->ifi_family = AF_UNSPEC;
950 ifi->ifi_index = ifindex;
0ad19a3f 951
952 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
953 goto out;
954
8d357196
DY
955 if (ifname != NULL) {
956 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
957 goto out;
958 }
959
3cfc0f3a 960 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 961out:
962 netlink_close(&nlh);
963 nlmsg_free(nlmsg);
964 return err;
965}
966
ebc73a67
CB
967/* If we are asked to move a wireless interface, then we must actually move its
968 * phyN device. Detect that condition and return the physname here. The physname
969 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
970 */
971#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 972static char *is_wlan(const char *ifname)
e5848d39 973{
b0293710 974 __do_free char *path = NULL;
ebc73a67 975 int i, ret;
e5848d39 976 long physlen;
ebc73a67 977 size_t len;
e5848d39 978 FILE *f;
ebc73a67 979 char *physname = NULL;
e5848d39 980
ebc73a67 981 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 982 path = must_realloc(NULL, len + 1);
e5848d39 983 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 984 if (ret < 0 || (size_t)ret >= len)
e5848d39 985 goto bad;
ebc73a67 986
ebc73a67
CB
987 f = fopen(path, "r");
988 if (!f)
e5848d39 989 goto bad;
ebc73a67 990
1a0e70ac 991 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
992 fseek(f, 0, SEEK_END);
993 physlen = ftell(f);
994 fseek(f, 0, SEEK_SET);
7d1cde93
SX
995 if (physlen < 0) {
996 fclose(f);
0382c0da 997 goto bad;
7d1cde93 998 }
ebc73a67
CB
999
1000 physname = malloc(physlen + 1);
ee54ea9a 1001 if (!physname) {
acf47e1b 1002 fclose(f);
e5848d39 1003 goto bad;
ee54ea9a 1004 }
ebc73a67
CB
1005
1006 memset(physname, 0, physlen + 1);
e5848d39
SH
1007 ret = fread(physname, 1, physlen, f);
1008 fclose(f);
1009 if (ret < 0)
1010 goto bad;
1011
ebc73a67 1012 for (i = 0; i < physlen; i++) {
e5848d39
SH
1013 if (physname[i] == '\n')
1014 physname[i] = '\0';
ebc73a67 1015
e5848d39
SH
1016 if (physname[i] == '\0')
1017 break;
1018 }
1019
1020 return physname;
1021
1022bad:
f10fad2f 1023 free(physname);
e5848d39
SH
1024 return NULL;
1025}
1026
ebc73a67
CB
1027static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1028 const char *new)
e5848d39 1029{
ebc73a67 1030 pid_t fpid;
e5848d39 1031
ebc73a67 1032 fpid = fork();
e5848d39
SH
1033 if (fpid < 0)
1034 return -1;
ebc73a67 1035
e5848d39
SH
1036 if (fpid != 0)
1037 return wait_for_pid(fpid);
ebc73a67 1038
e5848d39
SH
1039 if (!switch_to_ns(pid, "net"))
1040 return -1;
ebc73a67 1041
05ec44f8 1042 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1043}
1044
ebc73a67
CB
1045static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1046 const char *newname)
e5848d39 1047{
e5848d39 1048 char *cmd;
ebc73a67
CB
1049 pid_t fpid;
1050 int err = -1;
e5848d39
SH
1051
1052 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1053 * However, IIUC this involves a bit more complicated work to talk to
1054 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1055 */
1056 cmd = on_path("iw", NULL);
1057 if (!cmd)
1058 goto out1;
1059 free(cmd);
1060
1061 fpid = fork();
1062 if (fpid < 0)
1063 goto out1;
ebc73a67 1064
e5848d39
SH
1065 if (fpid == 0) {
1066 char pidstr[30];
1067 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1068 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1069 (char *)NULL);
ebd582ae 1070 _exit(EXIT_FAILURE);
e5848d39 1071 }
ebc73a67 1072
e5848d39
SH
1073 if (wait_for_pid(fpid))
1074 goto out1;
1075
1076 err = 0;
1077 if (newname)
1078 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1079
1080out1:
1081 free(physname);
1082 return err;
1083}
1084
8d357196 1085int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1086{
1087 int index;
e5848d39 1088 char *physname;
8befa924 1089
8befa924
SH
1090 if (!ifname)
1091 return -EINVAL;
1092
32571606 1093 index = if_nametoindex(ifname);
49428bf3
DY
1094 if (!index)
1095 return -EINVAL;
32571606 1096
ebc73a67
CB
1097 physname = is_wlan(ifname);
1098 if (physname)
e5848d39
SH
1099 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1100
8d357196 1101 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1102}
1103
b84f58b9 1104int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1105{
b84f58b9 1106 int err;
ebc73a67
CB
1107 struct ifinfomsg *ifi;
1108 struct nl_handler nlh;
1109 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1110
3cfc0f3a
MN
1111 err = netlink_open(&nlh, NETLINK_ROUTE);
1112 if (err)
1113 return err;
0ad19a3f 1114
3cfc0f3a 1115 err = -ENOMEM;
0ad19a3f 1116 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1117 if (!nlmsg)
1118 goto out;
1119
06f976ca 1120 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1121 if (!answer)
1122 goto out;
1123
ebc73a67 1124 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1125 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1126
1127 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1128 if (!ifi)
1129 goto out;
06f976ca
SZ
1130 ifi->ifi_family = AF_UNSPEC;
1131 ifi->ifi_index = ifindex;
0ad19a3f 1132
3cfc0f3a 1133 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1134out:
1135 netlink_close(&nlh);
1136 nlmsg_free(answer);
1137 nlmsg_free(nlmsg);
1138 return err;
1139}
1140
b84f58b9
DL
1141int lxc_netdev_delete_by_name(const char *name)
1142{
1143 int index;
1144
1145 index = if_nametoindex(name);
1146 if (!index)
1147 return -EINVAL;
1148
1149 return lxc_netdev_delete_by_index(index);
1150}
1151
1152int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1153{
ebc73a67 1154 int err, len;
06f976ca 1155 struct ifinfomsg *ifi;
ebc73a67
CB
1156 struct nl_handler nlh;
1157 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1158
3cfc0f3a
MN
1159 err = netlink_open(&nlh, NETLINK_ROUTE);
1160 if (err)
1161 return err;
b9a5bb58 1162
b84f58b9 1163 len = strlen(newname);
90d79629
CB
1164 if (len == 1 || len >= IFNAMSIZ) {
1165 err = -EINVAL;
b84f58b9 1166 goto out;
90d79629 1167 }
b84f58b9 1168
3cfc0f3a 1169 err = -ENOMEM;
b9a5bb58
DL
1170 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1171 if (!nlmsg)
1172 goto out;
1173
06f976ca 1174 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1175 if (!answer)
1176 goto out;
1177
ebc73a67 1178 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1179 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1180
1181 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1182 if (!ifi)
1183 goto out;
06f976ca
SZ
1184 ifi->ifi_family = AF_UNSPEC;
1185 ifi->ifi_index = ifindex;
b84f58b9
DL
1186
1187 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1188 goto out;
b9a5bb58 1189
3cfc0f3a 1190 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1191out:
1192 netlink_close(&nlh);
1193 nlmsg_free(answer);
1194 nlmsg_free(nlmsg);
1195 return err;
1196}
1197
b84f58b9
DL
1198int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1199{
1200 int len, index;
1201
1202 len = strlen(oldname);
dae3fdf6 1203 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1204 return -EINVAL;
1205
1206 index = if_nametoindex(oldname);
1207 if (!index)
1208 return -EINVAL;
1209
1210 return lxc_netdev_rename_by_index(index, newname);
1211}
1212
8befa924 1213int netdev_set_flag(const char *name, int flag)
0ad19a3f 1214{
ebc73a67 1215 int err, index, len;
06f976ca 1216 struct ifinfomsg *ifi;
ebc73a67
CB
1217 struct nl_handler nlh;
1218 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1219
3cfc0f3a
MN
1220 err = netlink_open(&nlh, NETLINK_ROUTE);
1221 if (err)
1222 return err;
0ad19a3f 1223
3cfc0f3a 1224 err = -EINVAL;
0ad19a3f 1225 len = strlen(name);
dae3fdf6 1226 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1227 goto out;
1228
3cfc0f3a 1229 err = -ENOMEM;
0ad19a3f 1230 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1231 if (!nlmsg)
1232 goto out;
1233
06f976ca 1234 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1235 if (!answer)
1236 goto out;
1237
3cfc0f3a 1238 err = -EINVAL;
0ad19a3f 1239 index = if_nametoindex(name);
1240 if (!index)
1241 goto out;
1242
ebc73a67 1243 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1244 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1245
1246 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1247 if (!ifi) {
1248 err = -ENOMEM;
1249 goto out;
1250 }
06f976ca
SZ
1251 ifi->ifi_family = AF_UNSPEC;
1252 ifi->ifi_index = index;
1253 ifi->ifi_change |= IFF_UP;
1254 ifi->ifi_flags |= flag;
0ad19a3f 1255
1256 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1257out:
1258 netlink_close(&nlh);
1259 nlmsg_free(nlmsg);
1260 nlmsg_free(answer);
1261 return err;
1262}
1263
ebc73a67 1264int netdev_get_flag(const char *name, int *flag)
efa1cf45 1265{
ebc73a67 1266 int err, index, len;
a4318300 1267 struct ifinfomsg *ifi;
ebc73a67
CB
1268 struct nl_handler nlh;
1269 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1270
1271 if (!name)
1272 return -EINVAL;
1273
1274 err = netlink_open(&nlh, NETLINK_ROUTE);
1275 if (err)
1276 return err;
1277
1278 err = -EINVAL;
1279 len = strlen(name);
1280 if (len == 1 || len >= IFNAMSIZ)
1281 goto out;
1282
1283 err = -ENOMEM;
1284 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1285 if (!nlmsg)
1286 goto out;
1287
06f976ca 1288 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1289 if (!answer)
1290 goto out;
1291
1292 err = -EINVAL;
1293 index = if_nametoindex(name);
1294 if (!index)
1295 goto out;
1296
06f976ca
SZ
1297 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1298 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1299
1300 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1301 if (!ifi) {
1302 err = -ENOMEM;
1303 goto out;
1304 }
06f976ca
SZ
1305 ifi->ifi_family = AF_UNSPEC;
1306 ifi->ifi_index = index;
efa1cf45
DY
1307
1308 err = netlink_transaction(&nlh, nlmsg, answer);
1309 if (err)
1310 goto out;
1311
06f976ca 1312 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1313
1314 *flag = ifi->ifi_flags;
1315out:
1316 netlink_close(&nlh);
1317 nlmsg_free(nlmsg);
1318 nlmsg_free(answer);
1319 return err;
1320}
1321
1322/*
1323 * \brief Check a interface is up or not.
1324 *
1325 * \param name: name for the interface.
1326 *
1327 * \return int.
1328 * 0 means interface is down.
1329 * 1 means interface is up.
1330 * Others means error happened, and ret-value is the error number.
1331 */
ebc73a67 1332int lxc_netdev_isup(const char *name)
efa1cf45 1333{
ebc73a67 1334 int err, flag;
efa1cf45
DY
1335
1336 err = netdev_get_flag(name, &flag);
1337 if (err)
ebc73a67
CB
1338 return err;
1339
efa1cf45
DY
1340 if (flag & IFF_UP)
1341 return 1;
ebc73a67 1342
efa1cf45 1343 return 0;
efa1cf45
DY
1344}
1345
0130df54
SH
1346int netdev_get_mtu(int ifindex)
1347{
ebc73a67 1348 int answer_len, err, res;
0130df54 1349 struct nl_handler nlh;
06f976ca 1350 struct ifinfomsg *ifi;
0130df54 1351 struct nlmsghdr *msg;
ebc73a67
CB
1352 int readmore = 0, recv_len = 0;
1353 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1354
1355 err = netlink_open(&nlh, NETLINK_ROUTE);
1356 if (err)
1357 return err;
1358
1359 err = -ENOMEM;
1360 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1361 if (!nlmsg)
1362 goto out;
1363
06f976ca 1364 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1365 if (!answer)
1366 goto out;
1367
1368 /* Save the answer buffer length, since it will be overwritten
1369 * on the first receive (and we might need to receive more than
ebc73a67
CB
1370 * once.
1371 */
06f976ca
SZ
1372 answer_len = answer->nlmsghdr->nlmsg_len;
1373
ebc73a67 1374 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1375 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1376
06f976ca 1377 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1378 if (!ifi)
1379 goto out;
06f976ca 1380 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1381
1382 /* Send the request for addresses, which returns all addresses
1383 * on all interfaces. */
1384 err = netlink_send(&nlh, nlmsg);
1385 if (err < 0)
1386 goto out;
1387
6ce39620
CB
1388#pragma GCC diagnostic push
1389#pragma GCC diagnostic ignored "-Wcast-align"
1390
0130df54
SH
1391 do {
1392 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1393 * overwritten by a previous receive.
1394 */
06f976ca 1395 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1396
1397 /* Get the (next) batch of reply messages */
1398 err = netlink_rcv(&nlh, answer);
1399 if (err < 0)
1400 goto out;
1401
1402 recv_len = err;
0130df54
SH
1403
1404 /* Satisfy the typing for the netlink macros */
06f976ca 1405 msg = answer->nlmsghdr;
0130df54
SH
1406
1407 while (NLMSG_OK(msg, recv_len)) {
1408
1409 /* Stop reading if we see an error message */
1410 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1411 struct nlmsgerr *errmsg =
1412 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1413 err = errmsg->error;
1414 goto out;
1415 }
1416
1417 /* Stop reading if we see a NLMSG_DONE message */
1418 if (msg->nlmsg_type == NLMSG_DONE) {
1419 readmore = 0;
1420 break;
1421 }
1422
06f976ca 1423 ifi = NLMSG_DATA(msg);
0130df54
SH
1424 if (ifi->ifi_index == ifindex) {
1425 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1426 int attr_len =
1427 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1428 res = 0;
ebc73a67
CB
1429 while (RTA_OK(rta, attr_len)) {
1430 /* Found a local address for the
1431 * requested interface, return it.
1432 */
0130df54 1433 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1434 memcpy(&res, RTA_DATA(rta),
1435 sizeof(int));
0130df54
SH
1436 err = res;
1437 goto out;
1438 }
1439 rta = RTA_NEXT(rta, attr_len);
1440 }
0130df54
SH
1441 }
1442
ebc73a67
CB
1443 /* Keep reading more data from the socket if the last
1444 * message had the NLF_F_MULTI flag set.
1445 */
0130df54
SH
1446 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1447
ebc73a67 1448 /* Look at the next message received in this buffer. */
0130df54
SH
1449 msg = NLMSG_NEXT(msg, recv_len);
1450 }
1451 } while (readmore);
1452
6ce39620
CB
1453#pragma GCC diagnostic pop
1454
ebc73a67 1455 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1456 err = -1;
1457
1458out:
1459 netlink_close(&nlh);
1460 nlmsg_free(answer);
1461 nlmsg_free(nlmsg);
1462 return err;
1463}
1464
d472214b 1465int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1466{
ebc73a67 1467 int err, index, len;
06f976ca 1468 struct ifinfomsg *ifi;
ebc73a67
CB
1469 struct nl_handler nlh;
1470 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1471
3cfc0f3a
MN
1472 err = netlink_open(&nlh, NETLINK_ROUTE);
1473 if (err)
1474 return err;
75d09f83 1475
3cfc0f3a 1476 err = -EINVAL;
75d09f83 1477 len = strlen(name);
dae3fdf6 1478 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1479 goto out;
1480
3cfc0f3a 1481 err = -ENOMEM;
75d09f83
DL
1482 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1483 if (!nlmsg)
1484 goto out;
1485
06f976ca 1486 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1487 if (!answer)
1488 goto out;
1489
3cfc0f3a 1490 err = -EINVAL;
75d09f83
DL
1491 index = if_nametoindex(name);
1492 if (!index)
1493 goto out;
1494
ebc73a67 1495 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1496 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1497
1498 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1499 if (!ifi) {
1500 err = -ENOMEM;
1501 goto out;
1502 }
06f976ca
SZ
1503 ifi->ifi_family = AF_UNSPEC;
1504 ifi->ifi_index = index;
75d09f83
DL
1505
1506 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1507 goto out;
1508
1509 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1510out:
1511 netlink_close(&nlh);
1512 nlmsg_free(nlmsg);
1513 nlmsg_free(answer);
1514 return err;
1515}
1516
d472214b 1517int lxc_netdev_up(const char *name)
0ad19a3f 1518{
d472214b 1519 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1520}
1521
d472214b 1522int lxc_netdev_down(const char *name)
0ad19a3f 1523{
d472214b 1524 return netdev_set_flag(name, 0);
0ad19a3f 1525}
1526
497353b6 1527int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1528{
ebc73a67 1529 int err, len;
06f976ca 1530 struct ifinfomsg *ifi;
ebc73a67 1531 struct nl_handler nlh;
0ad19a3f 1532 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1533 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1534
3cfc0f3a
MN
1535 err = netlink_open(&nlh, NETLINK_ROUTE);
1536 if (err)
1537 return err;
0ad19a3f 1538
3cfc0f3a 1539 err = -EINVAL;
0ad19a3f 1540 len = strlen(name1);
dae3fdf6 1541 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1542 goto out;
1543
1544 len = strlen(name2);
dae3fdf6 1545 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1546 goto out;
1547
3cfc0f3a 1548 err = -ENOMEM;
0ad19a3f 1549 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1550 if (!nlmsg)
1551 goto out;
1552
06f976ca 1553 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1554 if (!answer)
1555 goto out;
1556
06f976ca 1557 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1558 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1559 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1560
1561 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1562 if (!ifi)
1563 goto out;
06f976ca 1564 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1565
3cfc0f3a 1566 err = -EINVAL;
79e68309 1567 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1568 if (!nest1)
1569 goto out;
1570
1571 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1572 goto out;
1573
1574 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1575 if (!nest2)
1576 goto out;
1577
1578 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1579 if (!nest3)
1580 goto out;
1581
06f976ca 1582 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1583 if (!ifi) {
1584 err = -ENOMEM;
06f976ca 1585 goto out;
25a9939b 1586 }
0ad19a3f 1587
1588 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1589 goto out;
1590
1591 nla_end_nested(nlmsg, nest3);
0ad19a3f 1592 nla_end_nested(nlmsg, nest2);
0ad19a3f 1593 nla_end_nested(nlmsg, nest1);
1594
1595 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1596 goto out;
1597
3cfc0f3a 1598 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1599out:
1600 netlink_close(&nlh);
1601 nlmsg_free(answer);
1602 nlmsg_free(nlmsg);
1603 return err;
1604}
1605
ebc73a67 1606/* TODO: merge with lxc_macvlan_create */
7c11d57a 1607int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1608{
ebc73a67 1609 int err, len, lindex;
06f976ca 1610 struct ifinfomsg *ifi;
ebc73a67 1611 struct nl_handler nlh;
26c39028 1612 struct rtattr *nest, *nest2;
ebc73a67 1613 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1614
3cfc0f3a
MN
1615 err = netlink_open(&nlh, NETLINK_ROUTE);
1616 if (err)
1617 return err;
26c39028 1618
3cfc0f3a 1619 err = -EINVAL;
26c39028 1620 len = strlen(master);
dae3fdf6 1621 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1622 goto err3;
1623
1624 len = strlen(name);
dae3fdf6 1625 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1626 goto err3;
1627
3cfc0f3a 1628 err = -ENOMEM;
26c39028
JHS
1629 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1630 if (!nlmsg)
1631 goto err3;
1632
06f976ca 1633 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1634 if (!answer)
1635 goto err2;
1636
3cfc0f3a 1637 err = -EINVAL;
26c39028
JHS
1638 lindex = if_nametoindex(master);
1639 if (!lindex)
1640 goto err1;
1641
06f976ca 1642 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1643 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1644 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1645
1646 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1647 if (!ifi) {
1648 err = -ENOMEM;
1649 goto err1;
1650 }
06f976ca 1651 ifi->ifi_family = AF_UNSPEC;
26c39028 1652
79e68309 1653 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1654 if (!nest)
1655 goto err1;
1656
1657 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1658 goto err1;
1659
1660 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1661 if (!nest2)
1662 goto err1;
e892973e 1663
26c39028
JHS
1664 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1665 goto err1;
e892973e 1666
26c39028 1667 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1668 nla_end_nested(nlmsg, nest);
1669
1670 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1671 goto err1;
1672
1673 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1674 goto err1;
1675
3cfc0f3a 1676 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1677err1:
1678 nlmsg_free(answer);
1679err2:
1680 nlmsg_free(nlmsg);
1681err3:
1682 netlink_close(&nlh);
1683 return err;
1684}
1685
e892973e 1686int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1687{
ebc73a67 1688 int err, index, len;
06f976ca 1689 struct ifinfomsg *ifi;
ebc73a67 1690 struct nl_handler nlh;
e892973e 1691 struct rtattr *nest, *nest2;
ebc73a67 1692 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1693
3cfc0f3a
MN
1694 err = netlink_open(&nlh, NETLINK_ROUTE);
1695 if (err)
1696 return err;
0ad19a3f 1697
3cfc0f3a 1698 err = -EINVAL;
0ad19a3f 1699 len = strlen(master);
dae3fdf6 1700 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1701 goto out;
1702
1703 len = strlen(name);
dae3fdf6 1704 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1705 goto out;
1706
3cfc0f3a 1707 err = -ENOMEM;
0ad19a3f 1708 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1709 if (!nlmsg)
1710 goto out;
1711
06f976ca 1712 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1713 if (!answer)
1714 goto out;
1715
3cfc0f3a 1716 err = -EINVAL;
0ad19a3f 1717 index = if_nametoindex(master);
1718 if (!index)
1719 goto out;
1720
06f976ca 1721 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1722 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1723 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1724
1725 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1726 if (!ifi) {
1727 err = -ENOMEM;
1728 goto out;
1729 }
06f976ca 1730 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1731
79e68309 1732 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1733 if (!nest)
1734 goto out;
1735
1736 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1737 goto out;
1738
e892973e
DL
1739 if (mode) {
1740 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1741 if (!nest2)
1742 goto out;
1743
1744 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1745 goto out;
1746
1747 nla_end_nested(nlmsg, nest2);
1748 }
1749
0ad19a3f 1750 nla_end_nested(nlmsg, nest);
1751
1752 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1753 goto out;
1754
1755 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1756 goto out;
1757
3cfc0f3a 1758 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1759out:
1760 netlink_close(&nlh);
1761 nlmsg_free(answer);
1762 nlmsg_free(nlmsg);
1763 return err;
1764}
1765
1766static int proc_sys_net_write(const char *path, const char *value)
1767{
ebc73a67
CB
1768 int fd;
1769 int err = 0;
0ad19a3f 1770
1771 fd = open(path, O_WRONLY);
1772 if (fd < 0)
1773 return -errno;
1774
f640cf46 1775 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1776 err = -errno;
1777
1778 close(fd);
1779 return err;
1780}
1781
6509154d 1782static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1783{
1784 int ret;
1785 char path[PATH_MAX];
1786 char buf[1] = "";
1787
1788 if (family != AF_INET && family != AF_INET6)
1789 return minus_one_set_errno(EINVAL);
1790
1791 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1792 family == AF_INET ? "ipv4" : "ipv6", ifname,
1793 "forwarding");
1794 if (ret < 0 || (size_t)ret >= PATH_MAX)
1795 return minus_one_set_errno(E2BIG);
1796
1797 return lxc_read_file_expect(path, buf, 1, "1");
1798}
1799
0ad19a3f 1800static int neigh_proxy_set(const char *ifname, int family, int flag)
1801{
9ba8130c 1802 int ret;
419590da 1803 char path[PATH_MAX];
0ad19a3f 1804
1805 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1806 return -EINVAL;
0ad19a3f 1807
419590da 1808 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1809 family == AF_INET ? "ipv4" : "ipv6", ifname,
1810 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1811 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1812 return -E2BIG;
0ad19a3f 1813
ebc73a67 1814 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1815}
1816
6509154d 1817static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1818{
1819 int ret;
1820 char path[PATH_MAX];
1821 char buf[1] = "";
1822
1823 if (family != AF_INET && family != AF_INET6)
1824 return minus_one_set_errno(EINVAL);
1825
1826 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1827 family == AF_INET ? "ipv4" : "ipv6", ifname,
1828 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1829 if (ret < 0 || (size_t)ret >= PATH_MAX)
1830 return minus_one_set_errno(E2BIG);
1831
1832 return lxc_read_file_expect(path, buf, 1, "1");
1833}
1834
497353b6 1835int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1836{
1837 return neigh_proxy_set(name, family, 1);
1838}
1839
497353b6 1840int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1841{
1842 return neigh_proxy_set(name, family, 0);
1843}
1844
1845int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1846{
1f1b18e7
DL
1847 int i = 0;
1848 unsigned val;
ebc73a67
CB
1849 char c;
1850 unsigned char *data;
1f1b18e7
DL
1851
1852 sockaddr->sa_family = ARPHRD_ETHER;
1853 data = (unsigned char *)sockaddr->sa_data;
1854
1855 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1856 c = *macaddr++;
1857 if (isdigit(c))
1858 val = c - '0';
1859 else if (c >= 'a' && c <= 'f')
1860 val = c - 'a' + 10;
1861 else if (c >= 'A' && c <= 'F')
1862 val = c - 'A' + 10;
1863 else
1864 return -EINVAL;
1865
1866 val <<= 4;
1867 c = *macaddr;
1868 if (isdigit(c))
1869 val |= c - '0';
1870 else if (c >= 'a' && c <= 'f')
1871 val |= c - 'a' + 10;
1872 else if (c >= 'A' && c <= 'F')
1873 val |= c - 'A' + 10;
1874 else if (c == ':' || c == 0)
1875 val >>= 4;
1876 else
1877 return -EINVAL;
1878 if (c != 0)
1879 macaddr++;
1880 *data++ = (unsigned char)(val & 0377);
1881 i++;
1882
1883 if (*macaddr == ':')
1884 macaddr++;
0ad19a3f 1885 }
0ad19a3f 1886
1f1b18e7 1887 return 0;
0ad19a3f 1888}
1889
ebc73a67
CB
1890static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1891 void *acast, int prefix)
0ad19a3f 1892{
ebc73a67 1893 int addrlen, err;
06f976ca 1894 struct ifaddrmsg *ifa;
ebc73a67
CB
1895 struct nl_handler nlh;
1896 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1897
ebc73a67
CB
1898 addrlen = family == AF_INET ? sizeof(struct in_addr)
1899 : sizeof(struct in6_addr);
4bf1968d 1900
3cfc0f3a
MN
1901 err = netlink_open(&nlh, NETLINK_ROUTE);
1902 if (err)
1903 return err;
0ad19a3f 1904
3cfc0f3a 1905 err = -ENOMEM;
0ad19a3f 1906 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1907 if (!nlmsg)
1908 goto out;
1909
06f976ca 1910 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1911 if (!answer)
1912 goto out;
1913
06f976ca 1914 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1915 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1916 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1917
1918 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1919 if (!ifa)
25a9939b 1920 goto out;
06f976ca
SZ
1921 ifa->ifa_prefixlen = prefix;
1922 ifa->ifa_index = ifindex;
1923 ifa->ifa_family = family;
1924 ifa->ifa_scope = 0;
acf47e1b 1925
3cfc0f3a 1926 err = -EINVAL;
4bf1968d 1927 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1928 goto out;
1929
4bf1968d 1930 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1931 goto out;
1932
d8948a52 1933 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1934 goto out;
1935
ebc73a67 1936 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1937 err = -EPROTONOSUPPORT;
79881dc6
DL
1938 if (family == AF_INET6 &&
1939 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1940 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1941 goto out;
0ad19a3f 1942
3cfc0f3a 1943 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1944out:
1945 netlink_close(&nlh);
1946 nlmsg_free(answer);
1947 nlmsg_free(nlmsg);
1948 return err;
1949}
1950
1f1b18e7 1951int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1952 struct in6_addr *mcast, struct in6_addr *acast,
1953 int prefix)
1f1b18e7
DL
1954{
1955 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1956}
1957
ebc73a67
CB
1958int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1959 int prefix)
1f1b18e7
DL
1960{
1961 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1962}
1963
ebc73a67
CB
1964/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1965 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1966 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1967 */
6ce39620
CB
1968#pragma GCC diagnostic push
1969#pragma GCC diagnostic ignored "-Wcast-align"
1970
ebc73a67
CB
1971static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1972{
1973 int addrlen;
06f976ca
SZ
1974 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1975 struct rtattr *rta = IFA_RTA(ifa);
1976 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1977
06f976ca 1978 if (ifa->ifa_family != family)
19a26f82
MK
1979 return 0;
1980
ebc73a67
CB
1981 addrlen = family == AF_INET ? sizeof(struct in_addr)
1982 : sizeof(struct in6_addr);
19a26f82
MK
1983
1984 /* Loop over the rtattr's in this message */
ebc73a67 1985 while (RTA_OK(rta, attr_len)) {
19a26f82 1986 /* Found a local address for the requested interface,
ebc73a67
CB
1987 * return it.
1988 */
1989 if (rta->rta_type == IFA_LOCAL ||
1990 rta->rta_type == IFA_ADDRESS) {
1991 /* Sanity check. The family check above should make sure
1992 * the address length is correct, but check here just in
1993 * case.
1994 */
19a26f82
MK
1995 if (RTA_PAYLOAD(rta) != addrlen)
1996 return -1;
1997
ebc73a67
CB
1998 /* We might have found an IFA_ADDRESS before, which we
1999 * now overwrite with an IFA_LOCAL.
2000 */
dd66e5ad 2001 if (!*res) {
19a26f82 2002 *res = malloc(addrlen);
dd66e5ad
DE
2003 if (!*res)
2004 return -1;
2005 }
19a26f82
MK
2006
2007 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2008 if (rta->rta_type == IFA_LOCAL)
2009 break;
2010 }
2011 rta = RTA_NEXT(rta, attr_len);
2012 }
2013 return 0;
2014}
2015
6ce39620
CB
2016#pragma GCC diagnostic pop
2017
19a26f82
MK
2018static int ip_addr_get(int family, int ifindex, void **res)
2019{
ebc73a67 2020 int answer_len, err;
06f976ca 2021 struct ifaddrmsg *ifa;
ebc73a67 2022 struct nl_handler nlh;
19a26f82 2023 struct nlmsghdr *msg;
ebc73a67
CB
2024 int readmore = 0, recv_len = 0;
2025 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2026
2027 err = netlink_open(&nlh, NETLINK_ROUTE);
2028 if (err)
2029 return err;
2030
2031 err = -ENOMEM;
2032 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2033 if (!nlmsg)
2034 goto out;
2035
06f976ca 2036 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2037 if (!answer)
2038 goto out;
2039
ebc73a67
CB
2040 /* Save the answer buffer length, since it will be overwritten on the
2041 * first receive (and we might need to receive more than once).
2042 */
06f976ca
SZ
2043 answer_len = answer->nlmsghdr->nlmsg_len;
2044
ebc73a67 2045 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2046 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2047
06f976ca 2048 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2049 if (!ifa)
2050 goto out;
06f976ca 2051 ifa->ifa_family = family;
19a26f82 2052
ebc73a67
CB
2053 /* Send the request for addresses, which returns all addresses on all
2054 * interfaces.
2055 */
19a26f82
MK
2056 err = netlink_send(&nlh, nlmsg);
2057 if (err < 0)
2058 goto out;
19a26f82 2059
6ce39620
CB
2060#pragma GCC diagnostic push
2061#pragma GCC diagnostic ignored "-Wcast-align"
2062
19a26f82
MK
2063 do {
2064 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2065 * overwritten by a previous receive.
2066 */
06f976ca 2067 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2068
ebc73a67 2069 /* Get the (next) batch of reply messages. */
19a26f82
MK
2070 err = netlink_rcv(&nlh, answer);
2071 if (err < 0)
2072 goto out;
2073
2074 recv_len = err;
2075 err = 0;
2076
ebc73a67 2077 /* Satisfy the typing for the netlink macros. */
06f976ca 2078 msg = answer->nlmsghdr;
19a26f82
MK
2079
2080 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2081 /* Stop reading if we see an error message. */
19a26f82 2082 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2083 struct nlmsgerr *errmsg =
2084 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2085 err = errmsg->error;
2086 goto out;
2087 }
2088
ebc73a67 2089 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2090 if (msg->nlmsg_type == NLMSG_DONE) {
2091 readmore = 0;
2092 break;
2093 }
2094
2095 if (msg->nlmsg_type != RTM_NEWADDR) {
2096 err = -1;
2097 goto out;
2098 }
2099
06f976ca
SZ
2100 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2101 if (ifa->ifa_index == ifindex) {
2102 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2103 err = -1;
2104 goto out;
2105 }
2106
ebc73a67 2107 /* Found a result, stop searching. */
19a26f82
MK
2108 if (*res)
2109 goto out;
2110 }
2111
ebc73a67
CB
2112 /* Keep reading more data from the socket if the last
2113 * message had the NLF_F_MULTI flag set.
2114 */
19a26f82
MK
2115 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2116
ebc73a67 2117 /* Look at the next message received in this buffer. */
19a26f82
MK
2118 msg = NLMSG_NEXT(msg, recv_len);
2119 }
2120 } while (readmore);
2121
6ce39620
CB
2122#pragma GCC diagnostic pop
2123
19a26f82 2124 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2125 * error.
2126 */
19a26f82
MK
2127 err = -1;
2128
2129out:
2130 netlink_close(&nlh);
2131 nlmsg_free(answer);
2132 nlmsg_free(nlmsg);
2133 return err;
2134}
2135
2136int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2137{
ebc73a67 2138 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2139}
2140
ebc73a67 2141int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2142{
ebc73a67 2143 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2144}
2145
f8fee0e2
MK
2146static int ip_gateway_add(int family, int ifindex, void *gw)
2147{
ebc73a67 2148 int addrlen, err;
f8fee0e2 2149 struct nl_handler nlh;
06f976ca 2150 struct rtmsg *rt;
ebc73a67 2151 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2152
ebc73a67
CB
2153 addrlen = family == AF_INET ? sizeof(struct in_addr)
2154 : sizeof(struct in6_addr);
f8fee0e2
MK
2155
2156 err = netlink_open(&nlh, NETLINK_ROUTE);
2157 if (err)
2158 return err;
2159
2160 err = -ENOMEM;
2161 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2162 if (!nlmsg)
2163 goto out;
2164
06f976ca 2165 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2166 if (!answer)
2167 goto out;
2168
06f976ca 2169 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2170 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2171 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2172
2173 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2174 if (!rt)
2175 goto out;
06f976ca
SZ
2176 rt->rtm_family = family;
2177 rt->rtm_table = RT_TABLE_MAIN;
2178 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2179 rt->rtm_protocol = RTPROT_BOOT;
2180 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2181 /* "default" destination */
06f976ca 2182 rt->rtm_dst_len = 0;
f8fee0e2
MK
2183
2184 err = -EINVAL;
a2f9a670 2185
2186 /* If gateway address not supplied, then a device route will be created instead */
2187 if (gw != NULL) {
2188 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2189 goto out;
2190 }
f8fee0e2
MK
2191
2192 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2193 * addresses for the gateway.
2194 */
f8fee0e2
MK
2195 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2196 goto out;
2197
2198 err = netlink_transaction(&nlh, nlmsg, answer);
2199out:
2200 netlink_close(&nlh);
2201 nlmsg_free(answer);
2202 nlmsg_free(nlmsg);
2203 return err;
2204}
2205
2206int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2207{
2208 return ip_gateway_add(AF_INET, ifindex, gw);
2209}
2210
2211int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2212{
2213 return ip_gateway_add(AF_INET6, ifindex, gw);
2214}
581c75e7 2215bool is_ovs_bridge(const char *bridge)
0d204771 2216{
ebc73a67 2217 int ret;
0d204771 2218 struct stat sb;
ebc73a67 2219 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2220
ebc73a67
CB
2221 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2222 bridge);
2223 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2224 return false;
2225
2226 ret = stat(brdirname, &sb);
2227 if (ret < 0 && errno == ENOENT)
0d204771 2228 return true;
ebc73a67 2229
0d204771
SH
2230 return false;
2231}
2232
581c75e7
CB
2233struct ovs_veth_args {
2234 const char *bridge;
2235 const char *nic;
2236};
2237
cb0dc11b
CB
2238/* Called from a background thread - when nic goes away, remove it from the
2239 * bridge.
c43cbc04 2240 */
581c75e7 2241static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2242{
581c75e7 2243 struct ovs_veth_args *args = data;
cb0dc11b 2244
581c75e7
CB
2245 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2246 (char *)NULL);
2247 return -1;
c43cbc04
SH
2248}
2249
581c75e7 2250int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2251{
c43cbc04 2252 int ret;
419590da 2253 char cmd_output[PATH_MAX];
581c75e7 2254 struct ovs_veth_args args;
6ad22d06 2255
581c75e7
CB
2256 args.bridge = bridge;
2257 args.nic = nic;
2258 ret = run_command(cmd_output, sizeof(cmd_output),
2259 lxc_ovs_delete_port_exec, (void *)&args);
2260 if (ret < 0) {
2261 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2262 "%s", bridge, nic, cmd_output);
6ad22d06 2263 return -1;
581c75e7 2264 }
0d204771 2265
581c75e7
CB
2266 return 0;
2267}
ebc73a67 2268
581c75e7
CB
2269static int lxc_ovs_attach_bridge_exec(void *data)
2270{
2271 struct ovs_veth_args *args = data;
ebc73a67 2272
581c75e7
CB
2273 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2274 (char *)NULL);
2275 return -1;
2276}
ebc73a67 2277
581c75e7
CB
2278static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2279{
2280 int ret;
419590da 2281 char cmd_output[PATH_MAX];
581c75e7 2282 struct ovs_veth_args args;
ebc73a67 2283
581c75e7
CB
2284 args.bridge = bridge;
2285 args.nic = nic;
2286 ret = run_command(cmd_output, sizeof(cmd_output),
2287 lxc_ovs_attach_bridge_exec, (void *)&args);
2288 if (ret < 0) {
2289 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2290 bridge, nic, cmd_output);
2291 return -1;
c43cbc04 2292 }
0d204771 2293
581c75e7 2294 return 0;
0d204771 2295}
0d204771 2296
581c75e7 2297int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2298{
ebc73a67 2299 int err, fd, index;
9de31d5a 2300 size_t retlen;
0ad19a3f 2301 struct ifreq ifr;
2302
dae3fdf6 2303 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2304 return -EINVAL;
0ad19a3f 2305
2306 index = if_nametoindex(ifname);
2307 if (!index)
3cfc0f3a 2308 return -EINVAL;
0ad19a3f 2309
0d204771 2310 if (is_ovs_bridge(bridge))
581c75e7 2311 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2312
ad9429e5 2313 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2314 if (fd < 0)
3cfc0f3a 2315 return -errno;
0ad19a3f 2316
9de31d5a 2317 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2318 if (retlen >= IFNAMSIZ) {
2319 close(fd);
9de31d5a 2320 return -E2BIG;
42cc4083 2321 }
9de31d5a 2322
ebc73a67 2323 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2324 ifr.ifr_ifindex = index;
7d163508 2325 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2326 close(fd);
3cfc0f3a
MN
2327 if (err)
2328 err = -errno;
0ad19a3f 2329
2330 return err;
2331}
72d0e1cb 2332
ebc73a67 2333static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2334 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2335 [LXC_NET_VETH] = "veth",
2336 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2337 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2338 [LXC_NET_PHYS] = "phys",
b343592b
BP
2339 [LXC_NET_VLAN] = "vlan",
2340 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2341};
2342
2343const char *lxc_net_type_to_str(int type)
2344{
2345 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2346 return NULL;
ebc73a67 2347
72d0e1cb
SG
2348 return lxc_network_types[type];
2349}
8befa924 2350
ebc73a67 2351static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2352
966e9f1f 2353char *lxc_mkifname(char *template)
a0265685 2354{
2d7bf744 2355 int ret;
b1e44ed1 2356 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2357 char name[IFNAMSIZ];
2358 bool exists = false;
2359 size_t i = 0;
280cc35f 2360#ifdef HAVE_RAND_R
2361 unsigned int seed;
2362
2363 seed = randseed(false);
2364#else
2365
2366 (void)randseed(true);
2367#endif
a0265685 2368
535e8859
CB
2369 if (strlen(template) >= IFNAMSIZ)
2370 return NULL;
2371
ebc73a67 2372 /* Get all the network interfaces. */
b1e44ed1 2373 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2374 if (ret < 0) {
6d1400b5 2375 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2376 return NULL;
2377 }
a0265685 2378
ebc73a67 2379 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2380 for (;;) {
966e9f1f 2381 name[0] = '\0';
94b1cade 2382 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2383
966e9f1f 2384 exists = false;
280cc35f 2385
a0265685
SG
2386 for (i = 0; i < strlen(name); i++) {
2387 if (name[i] == 'X') {
2388#ifdef HAVE_RAND_R
8523344a 2389 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2390#else
8523344a 2391 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2392#endif
2393 }
2394 }
2395
2396 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2397 if (!strcmp(ifa->ifa_name, name)) {
2398 exists = true;
a0265685
SG
2399 break;
2400 }
2401 }
2402
966e9f1f 2403 if (!exists)
a0265685 2404 break;
a0265685
SG
2405 }
2406
b1e44ed1 2407 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2408 (void)strlcpy(template, name, strlen(template) + 1);
2409
2410 return template;
a0265685
SG
2411}
2412
8befa924
SH
2413int setup_private_host_hw_addr(char *veth1)
2414{
ebc73a67 2415 int err, sockfd;
8befa924 2416 struct ifreq ifr;
8befa924 2417
ad9429e5 2418 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2419 if (sockfd < 0)
2420 return -errno;
2421
ebc73a67 2422 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2423 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2424 close(sockfd);
ebc73a67 2425 return -E2BIG;
87c6e5db 2426 }
ebc73a67 2427
8befa924
SH
2428 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2429 if (err < 0) {
8befa924 2430 close(sockfd);
8befa924
SH
2431 return -errno;
2432 }
2433
2434 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2435 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2436 close(sockfd);
8befa924
SH
2437 if (err < 0)
2438 return -errno;
2439
2440 return 0;
2441}
811ef482
CB
2442
2443int lxc_find_gateway_addresses(struct lxc_handler *handler)
2444{
2445 struct lxc_list *network = &handler->conf->network;
2446 struct lxc_list *iterator;
2447 struct lxc_netdev *netdev;
2448 int link_index;
2449
2450 lxc_list_for_each(iterator, network) {
2451 netdev = iterator->elem;
2452
2453 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2454 continue;
2455
2456 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2457 ERROR("Automatic gateway detection is only supported "
2458 "for veth and macvlan");
2459 return -1;
2460 }
2461
de4855a8 2462 if (netdev->link[0] == '\0') {
811ef482
CB
2463 ERROR("Automatic gateway detection needs a link interface");
2464 return -1;
2465 }
2466
2467 link_index = if_nametoindex(netdev->link);
2468 if (!link_index)
2469 return -EINVAL;
2470
2471 if (netdev->ipv4_gateway_auto) {
2472 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2473 ERROR("Failed to automatically find ipv4 gateway "
2474 "address from link interface \"%s\"", netdev->link);
2475 return -1;
2476 }
2477 }
2478
2479 if (netdev->ipv6_gateway_auto) {
2480 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2481 ERROR("Failed to automatically find ipv6 gateway "
2482 "address from link interface \"%s\"", netdev->link);
2483 return -1;
2484 }
2485 }
2486 }
2487
2488 return 0;
2489}
2490
2491#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2492static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2493 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2494{
2495 int ret;
2496 pid_t child;
2497 int bytes, pipefd[2];
2498 char *token, *saveptr = NULL;
095ead80 2499 char netdev_link[IFNAMSIZ];
419590da 2500 char buffer[PATH_MAX] = {0};
94b1cade 2501 size_t retlen;
811ef482
CB
2502
2503 if (netdev->type != LXC_NET_VETH) {
2504 ERROR("Network type %d not support for unprivileged use", netdev->type);
2505 return -1;
2506 }
2507
2508 ret = pipe(pipefd);
2509 if (ret < 0) {
2510 SYSERROR("Failed to create pipe");
2511 return -1;
2512 }
2513
2514 child = fork();
2515 if (child < 0) {
2516 SYSERROR("Failed to create new process");
2517 close(pipefd[0]);
2518 close(pipefd[1]);
2519 return -1;
2520 }
2521
2522 if (child == 0) {
8335fd40 2523 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2524
2525 close(pipefd[0]);
2526
2527 ret = dup2(pipefd[1], STDOUT_FILENO);
2528 if (ret >= 0)
2529 ret = dup2(pipefd[1], STDERR_FILENO);
2530 close(pipefd[1]);
2531 if (ret < 0) {
2532 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2533 _exit(EXIT_FAILURE);
811ef482
CB
2534 }
2535
de4855a8 2536 if (netdev->link[0] != '\0')
9de31d5a 2537 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2538 else
9de31d5a
CB
2539 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2540 if (retlen >= IFNAMSIZ) {
2541 SYSERROR("Invalid network device name");
2542 _exit(EXIT_FAILURE);
2543 }
811ef482 2544
8335fd40
CB
2545 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2546 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2547 _exit(EXIT_FAILURE);
8335fd40 2548 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2549
2550 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2551 lxcname, pidstr, netdev_link,
de4855a8
CB
2552 netdev->name[0] != '\0' ? netdev->name : "(null)");
2553 if (netdev->name[0] != '\0')
811ef482
CB
2554 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2555 lxcpath, lxcname, pidstr, "veth", netdev_link,
2556 netdev->name, (char *)NULL);
2557 else
2558 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2559 lxcpath, lxcname, pidstr, "veth", netdev_link,
2560 (char *)NULL);
2561 SYSERROR("Failed to execute lxc-user-nic");
78070056 2562 _exit(EXIT_FAILURE);
811ef482
CB
2563 }
2564
2565 /* close the write-end of the pipe */
2566 close(pipefd[1]);
2567
419590da 2568 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2569 if (bytes < 0) {
74c6e2b0 2570 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2571 close(pipefd[0]);
6b9f82a9
CB
2572 } else {
2573 buffer[bytes - 1] = '\0';
811ef482 2574 }
811ef482
CB
2575
2576 ret = wait_for_pid(child);
2577 close(pipefd[0]);
6b9f82a9 2578 if (ret != 0 || bytes < 0) {
811ef482
CB
2579 ERROR("lxc-user-nic failed to configure requested network: %s",
2580 buffer[0] != '\0' ? buffer : "(null)");
2581 return -1;
2582 }
2583 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2584
2585 /* netdev->name */
2586 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2587 if (!token) {
2588 ERROR("Failed to parse lxc-user-nic output");
811ef482 2589 return -1;
74c6e2b0 2590 }
811ef482 2591
e389f2af
CB
2592 /*
2593 * lxc-user-nic will take care of proper network device naming. So
2594 * netdev->name and netdev->created_name need to be identical to not
2595 * trigger another rename later on.
2596 */
2597 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2598 if (retlen < IFNAMSIZ)
2599 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2600 if (retlen >= IFNAMSIZ) {
2601 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2602 return -E2BIG;
2603 }
811ef482 2604
74c6e2b0 2605 /* netdev->ifindex */
811ef482 2606 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2607 if (!token) {
2608 ERROR("Failed to parse lxc-user-nic output");
811ef482 2609 return -1;
74c6e2b0 2610 }
811ef482 2611
74c6e2b0
CB
2612 ret = lxc_safe_int(token, &netdev->ifindex);
2613 if (ret < 0) {
6d1400b5 2614 errno = -ret;
2615 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2616 return -1;
2617 }
2618
74c6e2b0 2619 /* netdev->priv.veth_attr.veth1 */
811ef482 2620 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2621 if (!token) {
2622 ERROR("Failed to parse lxc-user-nic output");
811ef482 2623 return -1;
74c6e2b0 2624 }
811ef482 2625
94b1cade
DJ
2626 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2627 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2628 ERROR("Host side veth device name returned by lxc-user-nic is "
2629 "too long");
2630 return -E2BIG;
2631 }
74c6e2b0
CB
2632
2633 /* netdev->priv.veth_attr.ifindex */
2634 token = strtok_r(NULL, ":", &saveptr);
2635 if (!token) {
2636 ERROR("Failed to parse lxc-user-nic output");
2637 return -1;
2638 }
2639
2640 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2641 if (ret < 0) {
6d1400b5 2642 errno = -ret;
2643 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2644 return -1;
2645 }
2646
4d781681 2647 if (netdev->upscript) {
2648 char *argv[] = {
2649 "veth",
2650 netdev->link,
2651 netdev->priv.veth_attr.veth1,
2652 NULL,
2653 };
2654
e389f2af
CB
2655 ret = run_script_argv(lxcname, hooks_version, "net",
2656 netdev->upscript, "up", argv);
4d781681 2657 if (ret < 0)
2658 return -1;
2659 }
2660
811ef482
CB
2661 return 0;
2662}
2663
f0ecc19d 2664static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2665 struct lxc_netdev *netdev,
2666 const char *netns_path)
811ef482
CB
2667{
2668 int bytes, ret;
2669 pid_t child;
2670 int pipefd[2];
419590da 2671 char buffer[PATH_MAX] = {0};
811ef482
CB
2672
2673 if (netdev->type != LXC_NET_VETH) {
2674 ERROR("Network type %d not support for unprivileged use", netdev->type);
2675 return -1;
2676 }
2677
2678 ret = pipe(pipefd);
2679 if (ret < 0) {
2680 SYSERROR("Failed to create pipe");
2681 return -1;
2682 }
2683
2684 child = fork();
2685 if (child < 0) {
2686 SYSERROR("Failed to create new process");
2687 close(pipefd[0]);
2688 close(pipefd[1]);
2689 return -1;
2690 }
2691
2692 if (child == 0) {
8843fde4 2693 char *hostveth;
811ef482
CB
2694
2695 close(pipefd[0]);
2696
2697 ret = dup2(pipefd[1], STDOUT_FILENO);
2698 if (ret >= 0)
2699 ret = dup2(pipefd[1], STDERR_FILENO);
2700 close(pipefd[1]);
2701 if (ret < 0) {
2702 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2703 _exit(EXIT_FAILURE);
811ef482
CB
2704 }
2705
8843fde4
CB
2706 if (netdev->priv.veth_attr.pair[0] != '\0')
2707 hostveth = netdev->priv.veth_attr.pair;
2708 else
2709 hostveth = netdev->priv.veth_attr.veth1;
2710 if (hostveth[0] == '\0') {
74c6e2b0 2711 SYSERROR("Host side veth device name is missing");
a30b9023 2712 _exit(EXIT_FAILURE);
74c6e2b0
CB
2713 }
2714
de4855a8 2715 if (netdev->link[0] == '\0') {
811ef482 2716 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2717 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2718 _exit(EXIT_FAILURE);
74c6e2b0 2719 }
811ef482 2720
811ef482 2721 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2722 lxcname, netns_path, netdev->link, hostveth);
811ef482 2723 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2724 lxcname, netns_path, "veth", netdev->link, hostveth,
2725 (char *)NULL);
811ef482 2726 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2727 _exit(EXIT_FAILURE);
811ef482
CB
2728 }
2729
2730 close(pipefd[1]);
2731
419590da 2732 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2733 if (bytes < 0) {
2734 SYSERROR("Failed to read from pipe file descriptor.");
2735 close(pipefd[0]);
6b9f82a9
CB
2736 } else {
2737 buffer[bytes - 1] = '\0';
811ef482 2738 }
811ef482 2739
6b9f82a9
CB
2740 ret = wait_for_pid(child);
2741 close(pipefd[0]);
2742 if (ret != 0 || bytes < 0) {
811ef482
CB
2743 ERROR("lxc-user-nic failed to delete requested network: %s",
2744 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2745 return -1;
2746 }
2747
811ef482
CB
2748 return 0;
2749}
2750
1bd8d726
CB
2751bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2752{
2753 int ret;
2754 struct lxc_list *iterator;
2755 struct lxc_list *network = &handler->conf->network;
2756 /* strlen("/proc/") = 6
2757 * +
8335fd40 2758 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2759 * +
2760 * strlen("/fd/") = 4
2761 * +
8335fd40 2762 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2763 * +
2764 * \0
2765 */
8335fd40 2766 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2767
2768 *netns_path = '\0';
2769
28d9e29e 2770 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2771 DEBUG("Cannot not guarantee safe deletion of network devices. "
2772 "Manual cleanup maybe needed");
2773 return false;
2774 }
2775
2776 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2777 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2778 if (ret < 0 || ret >= sizeof(netns_path))
2779 return false;
2780
2781 lxc_list_for_each(iterator, network) {
2782 char *hostveth = NULL;
2783 struct lxc_netdev *netdev = iterator->elem;
2784
2785 /* We can only delete devices whose ifindex we have. If we don't
2786 * have the index it means that we didn't create it.
2787 */
2788 if (!netdev->ifindex)
2789 continue;
2790
2791 if (netdev->type == LXC_NET_PHYS) {
2792 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2793 netdev->link);
2794 if (ret < 0)
2795 WARN("Failed to rename interface with index %d "
2796 "to its initial name \"%s\"",
2797 netdev->ifindex, netdev->link);
2798 else
2799 TRACE("Renamed interface with index %d to its "
2800 "initial name \"%s\"",
2801 netdev->ifindex, netdev->link);
b3259dc6
TP
2802
2803 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2804 goto clear_ifindices;
1bd8d726
CB
2805 }
2806
2807 ret = netdev_deconf[netdev->type](handler, netdev);
2808 if (ret < 0)
2809 WARN("Failed to deconfigure network device");
2810
2811 if (netdev->type != LXC_NET_VETH)
66a7c406 2812 goto clear_ifindices;
1bd8d726 2813
c869be20 2814 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2815 goto clear_ifindices;
1bd8d726 2816
8843fde4
CB
2817 if (netdev->priv.veth_attr.pair[0] != '\0')
2818 hostveth = netdev->priv.veth_attr.pair;
2819 else
2820 hostveth = netdev->priv.veth_attr.veth1;
2821 if (hostveth[0] == '\0')
66a7c406 2822 goto clear_ifindices;
8843fde4 2823
1bd8d726
CB
2824 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2825 handler->name, netdev,
2826 netns_path);
2827 if (ret < 0) {
1bd8d726 2828 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2829 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2830 goto clear_ifindices;
1bd8d726
CB
2831 }
2832 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2833 netdev->link);
66a7c406
CB
2834
2835clear_ifindices:
ad2ddfcd 2836 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2837 * have cached stale data which would cause it to fail on reboot
2838 * we're we don't re-read the on-disk config file.
2839 */
2840 netdev->ifindex = 0;
2841 if (netdev->type == LXC_NET_PHYS) {
2842 netdev->priv.phys_attr.ifindex = 0;
2843 } else if (netdev->type == LXC_NET_VETH) {
2844 netdev->priv.veth_attr.veth1[0] = '\0';
2845 netdev->priv.veth_attr.ifindex = 0;
2846 }
1bd8d726
CB
2847 }
2848
bb84beda 2849 return true;
1bd8d726
CB
2850}
2851
6509154d 2852struct ip_proxy_args {
2853 const char *ip;
2854 const char *dev;
2855};
2856
2857static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2858{
2859 struct ip_proxy_args *args = data;
2860
2861 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2862 return -1;
2863}
2864
2865static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2866{
2867 struct ip_proxy_args *args = data;
2868
2869 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2870 return -1;
2871}
2872
2873static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2874{
2875 int ret;
2876 char cmd_output[PATH_MAX];
2877 struct ip_proxy_args args = {
2878 .ip = ip,
2879 .dev = dev,
2880 };
2881
2882 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2883 if (ret < 0) {
2884 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2885 return -1;
2886 }
2887
2888 return 0;
2889}
2890
2891static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2892{
2893 int ret;
2894 char cmd_output[PATH_MAX];
2895 struct ip_proxy_args args = {
2896 .ip = ip,
2897 .dev = dev,
2898 };
2899
2900 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2901 if (ret < 0) {
2902 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2903 return -1;
2904 }
2905
2906 return 0;
2907}
2908
2909static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2910 struct lxc_list *cur, *next;
2911 struct lxc_inetdev *inet4dev;
2912 struct lxc_inet6dev *inet6dev;
2913 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2914 int err = 0;
2915 unsigned int lo_ifindex = 0;
6509154d 2916
2917 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2918 if (!lxc_list_empty(&netdev->ipv4)) {
2919 /* Check for net.ipv4.conf.[link].forwarding=1 */
2920 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2921 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2922 return minus_one_set_errno(EINVAL);
2923 }
2924 }
2925
2926 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2927 if (!lxc_list_empty(&netdev->ipv6)) {
2928 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2929 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2930 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2931 return minus_one_set_errno(EINVAL);
2932 }
2933
2934 /* Check for net.ipv6.conf.[link].forwarding=1 */
2935 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2936 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2937 return minus_one_set_errno(EINVAL);
2938 }
2939 }
2940
b670016a 2941 /* Perform IPVLAN specific checks. */
2942 if (netdev->type == LXC_NET_IPVLAN) {
2943 /* Check mode is l3s as other modes do not work with l2proxy. */
2944 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2945 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2946 return minus_one_set_errno(EINVAL);
2947 }
2948
2949 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2950 lo_ifindex = if_nametoindex(loop_device);
b670016a 2951 if (lo_ifindex == 0) {
3ebffb98 2952 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2953 return minus_one_set_errno(EINVAL);
2954 }
2955 }
2956
6509154d 2957 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2958 inet4dev = cur->elem;
2959 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2960 return minus_one_set_errno(-errno);
2961
2962 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2963 return minus_one_set_errno(EINVAL);
b670016a 2964
2965 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2966 if (netdev->type == LXC_NET_IPVLAN) {
2967 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2968 if (err < 0) {
3ebffb98 2969 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2970 return minus_one_set_errno(-err);
2971 }
2972 }
6509154d 2973 }
2974
2975 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2976 inet6dev = cur->elem;
2977 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2978 return minus_one_set_errno(-errno);
2979
2980 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2981 return minus_one_set_errno(EINVAL);
b670016a 2982
2983 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2984 if (netdev->type == LXC_NET_IPVLAN) {
2985 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2986 if (err < 0) {
3ebffb98 2987 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 2988 return minus_one_set_errno(-err);
2989 }
2990 }
6509154d 2991 }
2992
2993 return 0;
2994}
2995
b670016a 2996static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2997 char bufinet4[INET_ADDRSTRLEN];
2998 unsigned int errCount = 0;
2999
3000 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3001 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3002 return minus_one_set_errno(EINVAL);
3003 }
3004
3005 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3006 if (lo_ifindex > 0) {
3007 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3008 errCount++;
3009 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3010 }
3011 }
3012
3013 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3014 if (link[0] != '\0') {
3015 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3016 errCount++;
3017 }
3018
3019 if (errCount > 0)
3020 return minus_one_set_errno(EINVAL);
3021
3022 return 0;
3023}
3024
3025static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3026 char bufinet6[INET6_ADDRSTRLEN];
3027 unsigned int errCount = 0;
3028
3029 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3030 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3031 return minus_one_set_errno(EINVAL);
3032 }
3033
3034 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3035 if (lo_ifindex > 0) {
3036 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3037 errCount++;
3038 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3039 }
3040 }
3041
3042 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3043 if (link[0] != '\0') {
3044 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3045 errCount++;
3046 }
3047
3048 if (errCount > 0)
3049 return minus_one_set_errno(EINVAL);
3050
3051 return 0;
3052}
3053
6509154d 3054static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3055 unsigned int lo_ifindex = 0;
3056 unsigned int errCount = 0;
6509154d 3057 struct lxc_list *cur, *next;
3058 struct lxc_inetdev *inet4dev;
3059 struct lxc_inet6dev *inet6dev;
6509154d 3060
b670016a 3061 /* Perform IPVLAN specific checks. */
3062 if (netdev->type == LXC_NET_IPVLAN) {
3063 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3064 lo_ifindex = if_nametoindex(loop_device);
b670016a 3065 if (lo_ifindex == 0) {
3066 errCount++;
3ebffb98 3067 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3068 }
b670016a 3069 }
6509154d 3070
b670016a 3071 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3072 inet4dev = cur->elem;
3073 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3074 errCount++;
6509154d 3075 }
3076
3077 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3078 inet6dev = cur->elem;
b670016a 3079 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3080 errCount++;
6509154d 3081 }
3082
b670016a 3083 if (errCount > 0)
6509154d 3084 return minus_one_set_errno(EINVAL);
3085
3086 return 0;
3087}
3088
e389f2af 3089static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3090{
811ef482
CB
3091 struct lxc_list *iterator;
3092 struct lxc_list *network = &handler->conf->network;
3093
811ef482
CB
3094 lxc_list_for_each(iterator, network) {
3095 struct lxc_netdev *netdev = iterator->elem;
3096
3097 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3098 ERROR("Invalid network configuration type %d", netdev->type);
3099 return -1;
3100 }
3101
6509154d 3102 /* Setup l2proxy entries if enabled and used with a link property */
3103 if (netdev->l2proxy && netdev->link[0] != '\0') {
3104 if (lxc_setup_l2proxy(netdev)) {
3105 ERROR("Failed to setup l2proxy");
3106 return -1;
3107 }
3108 }
3109
811ef482
CB
3110 if (netdev_conf[netdev->type](handler, netdev)) {
3111 ERROR("Failed to create network device");
3112 return -1;
3113 }
811ef482
CB
3114 }
3115
3116 return 0;
3117}
3118
e389f2af 3119int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3120{
e389f2af
CB
3121 pid_t pid = handler->pid;
3122 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3123 struct lxc_list *iterator;
3124
e0010464 3125 if (am_guest_unpriv())
74c6e2b0 3126 return 0;
811ef482
CB
3127
3128 lxc_list_for_each(iterator, network) {
e389f2af
CB
3129 int ret;
3130 char ifname[IFNAMSIZ];
811ef482
CB
3131 struct lxc_netdev *netdev = iterator->elem;
3132
811ef482
CB
3133 if (!netdev->ifindex)
3134 continue;
3135
3136 /* retrieve the name of the interface */
3137 if (!if_indextoname(netdev->ifindex, ifname)) {
3138 ERROR("No interface corresponding to ifindex \"%d\"",
3139 netdev->ifindex);
3140 return -1;
3141 }
3142
535e8859
CB
3143 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3144 if (ret) {
6d1400b5 3145 errno = -ret;
e389f2af
CB
3146 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3147 ifname, pid);
811ef482
CB
3148 return -1;
3149 }
3150
e389f2af
CB
3151 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3152
3153 DEBUG("Moved network device \"%s\" to network namespace of %d",
3154 netdev->created_name, pid);
811ef482
CB
3155 }
3156
3157 return 0;
3158}
3159
e389f2af 3160static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3161{
e389f2af
CB
3162 int hooks_version = handler->conf->hooks_version;
3163 const char *lxcname = handler->name;
3164 const char *lxcpath = handler->lxcpath;
3165 struct lxc_list *network = &handler->conf->network;
3166 pid_t pid = handler->pid;
74c6e2b0
CB
3167 struct lxc_list *iterator;
3168
74c6e2b0
CB
3169 lxc_list_for_each(iterator, network) {
3170 struct lxc_netdev *netdev = iterator->elem;
3171
3172 if (netdev->type == LXC_NET_EMPTY)
3173 continue;
3174
3175 if (netdev->type == LXC_NET_NONE)
3176 continue;
3177
3178 if (netdev->type != LXC_NET_VETH) {
e389f2af 3179 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3180 lxc_net_type_to_str(netdev->type));
3181 return -1;
3182 }
3183
3184 if (netdev->mtu)
3185 INFO("mtu ignored due to insufficient privilege");
3186
e389f2af
CB
3187 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3188 pid, hooks_version))
74c6e2b0
CB
3189 return -1;
3190 }
3191
3192 return 0;
3193}
3194
1bd8d726 3195bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3196{
3197 int ret;
3198 struct lxc_list *iterator;
3199 struct lxc_list *network = &handler->conf->network;
1bd8d726 3200
811ef482
CB
3201 lxc_list_for_each(iterator, network) {
3202 char *hostveth = NULL;
3203 struct lxc_netdev *netdev = iterator->elem;
3204
3205 /* We can only delete devices whose ifindex we have. If we don't
3206 * have the index it means that we didn't create it.
3207 */
3208 if (!netdev->ifindex)
3209 continue;
3210
6509154d 3211 /* Delete l2proxy entries if enabled and used with a link property */
3212 if (netdev->l2proxy && netdev->link[0] != '\0') {
3213 if (lxc_delete_l2proxy(netdev))
3214 WARN("Failed to delete all l2proxy config");
3215 /* Don't return, let the network be cleaned up as normal. */
3216 }
3217
811ef482
CB
3218 if (netdev->type == LXC_NET_PHYS) {
3219 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3220 if (ret < 0)
3221 WARN("Failed to rename interface with index %d "
b809f232
CB
3222 "from \"%s\" to its initial name \"%s\"",
3223 netdev->ifindex, netdev->name, netdev->link);
0b154989 3224 else {
29589196
CB
3225 TRACE("Renamed interface with index %d from "
3226 "\"%s\" to its initial name \"%s\"",
3227 netdev->ifindex, netdev->name,
3228 netdev->link);
0b154989
TP
3229
3230 /* Restore original MTU */
3231 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3232 if (ret < 0) {
3233 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3234 netdev->link, netdev->priv.phys_attr.mtu);
3235 } else {
3236 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3237 netdev->link, netdev->priv.phys_attr.mtu);
3238 }
3239 }
b3259dc6
TP
3240
3241 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3242 goto clear_ifindices;
811ef482
CB
3243 }
3244
3245 ret = netdev_deconf[netdev->type](handler, netdev);
3246 if (ret < 0)
3247 WARN("Failed to deconfigure network device");
3248
3249 /* Recent kernels remove the virtual interfaces when the network
3250 * namespace is destroyed but in case we did not move the
3251 * interface to the network namespace, we have to destroy it.
3252 */
1bd8d726 3253 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3254 if (ret < 0) {
3255 if (errno != ENODEV) {
3256 WARN("Failed to remove interface \"%s\" with index %d",
3257 netdev->name[0] != '\0' ? netdev->name : "(null)",
3258 netdev->ifindex);
3259 goto clear_ifindices;
3260 }
3261 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3262 netdev->name[0] != '\0' ? netdev->name : "(null)",
3263 netdev->ifindex);
811ef482 3264 }
1bd8d726 3265 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3266 netdev->name[0] != '\0' ? netdev->name : "(null)",
3267 netdev->ifindex);
811ef482
CB
3268
3269 if (netdev->type != LXC_NET_VETH)
66a7c406 3270 goto clear_ifindices;
811ef482 3271
811ef482
CB
3272 /* Explicitly delete host veth device to prevent lingering
3273 * devices. We had issues in LXD around this.
3274 */
de4855a8 3275 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3276 hostveth = netdev->priv.veth_attr.pair;
3277 else
3278 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3279 if (hostveth[0] == '\0')
66a7c406 3280 goto clear_ifindices;
811ef482
CB
3281
3282 ret = lxc_netdev_delete_by_name(hostveth);
3283 if (ret < 0) {
24548539
CB
3284 WARN("Failed to remove interface \"%s\" from \"%s\"",
3285 hostveth, netdev->link);
66a7c406 3286 goto clear_ifindices;
811ef482
CB
3287 }
3288 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3289
c869be20 3290 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3291 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3292 netdev->ifindex = 0;
3293 netdev->priv.veth_attr.ifindex = 0;
3294 goto clear_ifindices;
811ef482
CB
3295 }
3296
3297 /* Delete the openvswitch port. */
3298 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3299 if (ret < 0)
3300 WARN("Failed to remove port \"%s\" from openvswitch "
3301 "bridge \"%s\"", hostveth, netdev->link);
3302 else
3303 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3304 hostveth, netdev->link);
3305
66a7c406 3306clear_ifindices:
ad2ddfcd 3307 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3308 * have cached stale data which would cause it to fail on reboot
3309 * we're we don't re-read the on-disk config file.
3310 */
3311 netdev->ifindex = 0;
3312 if (netdev->type == LXC_NET_PHYS) {
3313 netdev->priv.phys_attr.ifindex = 0;
3314 } else if (netdev->type == LXC_NET_VETH) {
3315 netdev->priv.veth_attr.veth1[0] = '\0';
3316 netdev->priv.veth_attr.ifindex = 0;
3317 }
811ef482
CB
3318 }
3319
bb84beda 3320 return true;
811ef482
CB
3321}
3322
3323int lxc_requests_empty_network(struct lxc_handler *handler)
3324{
3325 struct lxc_list *network = &handler->conf->network;
3326 struct lxc_list *iterator;
3327 bool found_none = false, found_nic = false;
3328
3329 if (lxc_list_empty(network))
3330 return 0;
3331
3332 lxc_list_for_each(iterator, network) {
3333 struct lxc_netdev *netdev = iterator->elem;
3334
3335 if (netdev->type == LXC_NET_NONE)
3336 found_none = true;
3337 else
3338 found_nic = true;
3339 }
3340 if (found_none && !found_nic)
3341 return 1;
3342 return 0;
3343}
3344
3345/* try to move physical nics to the init netns */
b809f232 3346int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3347{
3348 int ret;
b809f232 3349 int oldfd;
811ef482 3350 char ifname[IFNAMSIZ];
b809f232 3351 struct lxc_list *iterator;
28d9e29e 3352 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3353 struct lxc_conf *conf = handler->conf;
811ef482 3354
b809f232
CB
3355 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3356 * the parent network namespace. We won't have this capability if we are
3357 * unprivileged.
3358 */
d0fbc7ba 3359 if (!handler->am_root)
b809f232 3360 return 0;
811ef482 3361
b809f232 3362 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3363
0037ab49 3364 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3365 if (oldfd < 0) {
3366 SYSERROR("Failed to preserve network namespace");
b809f232 3367 return -1;
811ef482
CB
3368 }
3369
b809f232 3370 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3371 if (ret < 0) {
3372 SYSERROR("Failed to enter network namespace");
3373 close(oldfd);
b809f232 3374 return -1;
811ef482
CB
3375 }
3376
b809f232
CB
3377 lxc_list_for_each(iterator, &conf->network) {
3378 struct lxc_netdev *netdev = iterator->elem;
811ef482 3379
b809f232
CB
3380 if (netdev->type != LXC_NET_PHYS)
3381 continue;
3382
3383 /* Retrieve the name of the interface in the container's network
3384 * namespace.
3385 */
3386 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3387 WARN("No interface corresponding to ifindex %d",
b809f232 3388 netdev->ifindex);
811ef482
CB
3389 continue;
3390 }
b809f232 3391
0037ab49 3392 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3393 if (ret < 0)
811ef482
CB
3394 WARN("Error moving network device \"%s\" back to "
3395 "network namespace", ifname);
b809f232
CB
3396 else
3397 TRACE("Moved network device \"%s\" back to network "
3398 "namespace", ifname);
811ef482 3399 }
811ef482 3400
b809f232 3401 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3402 close(oldfd);
b809f232
CB
3403 if (ret < 0) {
3404 SYSERROR("Failed to enter network namespace");
3405 return -1;
3406 }
3407
3408 return 0;
811ef482
CB
3409}
3410
3411static int setup_hw_addr(char *hwaddr, const char *ifname)
3412{
3413 struct sockaddr sockaddr;
3414 struct ifreq ifr;
6d1400b5 3415 int ret, fd;
811ef482
CB
3416
3417 ret = lxc_convert_mac(hwaddr, &sockaddr);
3418 if (ret) {
6d1400b5 3419 errno = -ret;
3420 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3421 return -1;
3422 }
3423
3424 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3425 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3426 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3427
ad9429e5 3428 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3429 if (fd < 0)
3430 return -1;
3431
3432 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3433 if (ret)
6d1400b5 3434 SYSERROR("Failed to perform ioctl");
3435
3436 close(fd);
811ef482
CB
3437
3438 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3439 ifr.ifr_name);
3440
3441 return ret;
3442}
3443
3444static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3445{
3446 struct lxc_list *iterator;
3447 int err;
3448
3449 lxc_list_for_each(iterator, ip) {
3450 struct lxc_inetdev *inetdev = iterator->elem;
3451
3452 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3453 &inetdev->bcast, inetdev->prefix);
3454 if (err) {
6d1400b5 3455 errno = -err;
3456 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3457 "with ifindex %d", ifindex);
811ef482
CB
3458 return -1;
3459 }
3460 }
3461
3462 return 0;
3463}
3464
3465static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3466{
3467 struct lxc_list *iterator;
3468 int err;
3469
3470 lxc_list_for_each(iterator, ip) {
3471 struct lxc_inet6dev *inet6dev = iterator->elem;
3472
3473 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3474 &inet6dev->mcast, &inet6dev->acast,
3475 inet6dev->prefix);
3476 if (err) {
6d1400b5 3477 errno = -err;
3478 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3479 "with ifindex %d", ifindex);
811ef482
CB
3480 return -1;
3481 }
3482 }
3483
3484 return 0;
3485}
3486
3487static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3488{
3489 char ifname[IFNAMSIZ];
3490 int err;
811ef482 3491 char *current_ifname = ifname;
009d6127 3492 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3493
3494 /* empty network namespace */
3495 if (!netdev->ifindex) {
3496 if (netdev->flags & IFF_UP) {
3497 err = lxc_netdev_up("lo");
3498 if (err) {
6d1400b5 3499 errno = -err;
3500 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3501 return -1;
3502 }
3503 }
3504
3505 if (netdev->type == LXC_NET_EMPTY)
3506 return 0;
3507
3508 if (netdev->type == LXC_NET_NONE)
3509 return 0;
3510
e389f2af
CB
3511 netdev->ifindex = if_nametoindex(netdev->created_name);
3512 if (!netdev->ifindex)
3513 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3514 netdev->name ?: "(null)");
811ef482
CB
3515 }
3516
3517 /* get the new ifindex in case of physical netdev */
3518 if (netdev->type == LXC_NET_PHYS) {
3519 netdev->ifindex = if_nametoindex(netdev->link);
3520 if (!netdev->ifindex) {
3521 ERROR("Failed to get ifindex for network device \"%s\"",
3522 netdev->link);
3523 return -1;
3524 }
3525 }
3526
3527 /* retrieve the name of the interface */
3528 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3529 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3530 netdev->ifindex);
811ef482
CB
3531 return -1;
3532 }
3533
e389f2af 3534 /* Default: let the system choose an interface name.
811ef482
CB
3535 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3536 * netlink will replace the format specifier with an appropriate index.
3537 */
de4855a8
CB
3538 if (netdev->name[0] == '\0') {
3539 if (netdev->type == LXC_NET_PHYS)
94b1cade 3540 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3541 else
94b1cade 3542 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3543 }
811ef482
CB
3544
3545 /* rename the interface name */
e389f2af
CB
3546 if (strcmp(current_ifname, netdev->name) != 0) {
3547 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3548 if (err) {
6d1400b5 3549 errno = -err;
3550 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3551 current_ifname, netdev->name);
811ef482
CB
3552 return -1;
3553 }
e389f2af
CB
3554
3555 TRACE("Renamed network device from \"%s\" to \"%s\"",
3556 current_ifname, netdev->name);
811ef482
CB
3557 }
3558
3559 /* Re-read the name of the interface because its name has changed
3560 * and would be automatically allocated by the system
3561 */
3562 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3563 ERROR("Failed get name for network device with ifindex %d",
3564 netdev->ifindex);
3565 return -1;
3566 }
3567
790255cf
CB
3568 /* Now update the recorded name of the network device to reflect the
3569 * name of the network device in the child's network namespace. We will
3570 * later on send this information back to the parent.
3571 */
94b1cade 3572 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3573
811ef482
CB
3574 /* set a mac address */
3575 if (netdev->hwaddr) {
3576 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3577 ERROR("Failed to setup hw address for network device \"%s\"",
3578 current_ifname);
3579 return -1;
3580 }
3581 }
3582
3583 /* setup ipv4 addresses on the interface */
3584 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3585 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3586 current_ifname);
811ef482
CB
3587 return -1;
3588 }
3589
3590 /* setup ipv6 addresses on the interface */
3591 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3592 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3593 current_ifname);
811ef482
CB
3594 return -1;
3595 }
3596
3597 /* set the network device up */
3598 if (netdev->flags & IFF_UP) {
811ef482
CB
3599 err = lxc_netdev_up(current_ifname);
3600 if (err) {
6d1400b5 3601 errno = -err;
3602 SYSERROR("Failed to set network device \"%s\" up",
3603 current_ifname);
811ef482
CB
3604 return -1;
3605 }
3606
3607 /* the network is up, make the loopback up too */
3608 err = lxc_netdev_up("lo");
3609 if (err) {
6d1400b5 3610 errno = -err;
3611 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3612 return -1;
3613 }
3614 }
3615
811ef482 3616 /* setup ipv4 gateway on the interface */
a2f9a670 3617 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3618 if (!(netdev->flags & IFF_UP)) {
3619 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3620 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3621 return -1;
3622 }
3623
3624 if (lxc_list_empty(&netdev->ipv4)) {
3625 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3626 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3627 return -1;
3628 }
3629
a2f9a670 3630 /* Setup device route if ipv4_gateway_dev is enabled */
3631 if (netdev->ipv4_gateway_dev) {
3632 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3633 if (err < 0) {
3634 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3635 current_ifname);
a2f9a670 3636 return minus_one_set_errno(-err);
811ef482 3637 }
a2f9a670 3638 } else {
009d6127 3639 /* Check the gateway address is valid */
3640 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3641 return minus_one_set_errno(errno);
3642
3643 /* Try adding a default route to the gateway address */
811ef482 3644 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3645 if (err < 0) {
3646 /* If adding the default route fails, this could be because the
3647 * gateway address is in a different subnet to the container's address.
3648 * To work around this, we try adding a static device route to the
3649 * gateway address first, and then try again.
3650 */
a2f9a670 3651 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3652 if (err < 0) {
a2f9a670 3653 errno = -err;
009d6127 3654 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3655 bufinet4, current_ifname);
009d6127 3656 return -1;
a2f9a670 3657 }
6d1400b5 3658
a2f9a670 3659 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3660 if (err < 0) {
a2f9a670 3661 errno = -err;
009d6127 3662 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3663 bufinet4, current_ifname);
a2f9a670 3664 return -1;
811ef482 3665 }
811ef482
CB
3666 }
3667 }
3668 }
3669
3670 /* setup ipv6 gateway on the interface */
a2f9a670 3671 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3672 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3673 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3674 current_ifname);
811ef482
CB
3675 return -1;
3676 }
3677
3678 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3679 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3680 current_ifname);
811ef482
CB
3681 return -1;
3682 }
3683
a2f9a670 3684 /* Setup device route if ipv6_gateway_dev is enabled */
3685 if (netdev->ipv6_gateway_dev) {
3686 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3687 if (err < 0) {
3688 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3689 current_ifname);
a2f9a670 3690 return minus_one_set_errno(-err);
811ef482 3691 }
a2f9a670 3692 } else {
009d6127 3693 /* Check the gateway address is valid */
3694 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3695 return minus_one_set_errno(errno);
3696
3697 /* Try adding a default route to the gateway address */
811ef482 3698 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3699 if (err < 0) {
3700 /* If adding the default route fails, this could be because the
3701 * gateway address is in a different subnet to the container's address.
3702 * To work around this, we try adding a static device route to the
3703 * gateway address first, and then try again.
3704 */
a2f9a670 3705 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3706 if (err < 0) {
a2f9a670 3707 errno = -err;
009d6127 3708 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3709 bufinet6, current_ifname);
009d6127 3710 return -1;
a2f9a670 3711 }
6d1400b5 3712
a2f9a670 3713 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3714 if (err < 0) {
a2f9a670 3715 errno = -err;
009d6127 3716 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3717 bufinet6, current_ifname);
a2f9a670 3718 return -1;
811ef482 3719 }
811ef482
CB
3720 }
3721 }
3722 }
3723
74c6e2b0 3724 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3725
3726 return 0;
3727}
3728
3729int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3730 struct lxc_list *network)
3731{
3732 struct lxc_list *iterator;
811ef482 3733
811ef482 3734 lxc_list_for_each(iterator, network) {
e389f2af 3735 struct lxc_netdev *netdev = iterator->elem;
811ef482 3736
811ef482 3737 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3738 ERROR("Failed to setup netdev");
811ef482
CB
3739 return -1;
3740 }
3741 }
3742
3743 if (!lxc_list_empty(network))
e389f2af 3744 INFO("Network has been setup");
811ef482
CB
3745
3746 return 0;
3747}
7ab1ba02
CB
3748
3749int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3750{
3751 struct lxc_list *iterator;
3752 struct lxc_list *network = &handler->conf->network;
3753 int data_sock = handler->data_sock[0];
3754
7ab1ba02
CB
3755 lxc_list_for_each(iterator, network) {
3756 int ret;
3757 struct lxc_netdev *netdev = iterator->elem;
3758
3759 if (netdev->type != LXC_NET_VETH)
3760 continue;
3761
7fbb15ec 3762 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3763 if (ret < 0)
7ab1ba02 3764 return -1;
e389f2af
CB
3765
3766 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3767 if (ret < 0)
3768 return -1;
3769
3770 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3771 }
3772
3773 return 0;
3774}
3775
3776int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3777{
3778 struct lxc_list *iterator;
3779 struct lxc_list *network = &handler->conf->network;
3780 int data_sock = handler->data_sock[1];
3781
7ab1ba02
CB
3782 lxc_list_for_each(iterator, network) {
3783 int ret;
3784 struct lxc_netdev *netdev = iterator->elem;
3785
3786 if (netdev->type != LXC_NET_VETH)
3787 continue;
3788
e3233f26 3789 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3790 if (ret < 0)
7ab1ba02 3791 return -1;
e389f2af
CB
3792
3793 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3794 if (ret < 0)
3795 return -1;
3796 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3797 }
3798
3799 return 0;
3800}
a1ae535a
CB
3801
3802int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3803{
3804 struct lxc_list *iterator, *network;
3805 int data_sock = handler->data_sock[0];
3806
3807 if (!handler->am_root)
3808 return 0;
3809
3810 network = &handler->conf->network;
3811 lxc_list_for_each(iterator, network) {
3812 int ret;
3813 struct lxc_netdev *netdev = iterator->elem;
3814
3815 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3816 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3817 if (ret < 0)
7729f8e5 3818 return -1;
a1ae535a
CB
3819
3820 /* Send network device ifindex in the child's namespace to
3821 * parent.
3822 */
7fbb15ec 3823 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3824 if (ret < 0)
7729f8e5 3825 return -1;
a1ae535a
CB
3826 }
3827
e389f2af
CB
3828 if (!lxc_list_empty(network))
3829 TRACE("Sent network device names and ifindices to parent");
3830
a1ae535a 3831 return 0;
a1ae535a
CB
3832}
3833
3834int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3835{
3836 struct lxc_list *iterator, *network;
3837 int data_sock = handler->data_sock[1];
3838
3839 if (!handler->am_root)
3840 return 0;
3841
3842 network = &handler->conf->network;
3843 lxc_list_for_each(iterator, network) {
3844 int ret;
3845 struct lxc_netdev *netdev = iterator->elem;
3846
3847 /* Receive network device name in the child's namespace to
3848 * parent.
3849 */
e3233f26 3850 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3851 if (ret < 0)
7729f8e5 3852 return -1;
a1ae535a
CB
3853
3854 /* Receive network device ifindex in the child's namespace to
3855 * parent.
3856 */
e3233f26 3857 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3858 if (ret < 0)
7729f8e5 3859 return -1;
a1ae535a
CB
3860 }
3861
3862 return 0;
a1ae535a 3863}
bb84beda
CB
3864
3865void lxc_delete_network(struct lxc_handler *handler)
3866{
3867 bool bret;
3868
3869 if (handler->am_root)
3870 bret = lxc_delete_network_priv(handler);
3871 else
3872 bret = lxc_delete_network_unpriv(handler);
3873 if (!bret)
3874 DEBUG("Failed to delete network devices");
3875 else
3876 DEBUG("Deleted network devices");
3877}
1cd95214 3878
1cd95214
CB
3879int lxc_netns_set_nsid(int fd)
3880{
41a3300d 3881 int ret;
0ce60f0d
CB
3882 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3883 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3884 NLMSG_ALIGN(1024)];
1cd95214 3885 struct nl_handler nlh;
0ce60f0d
CB
3886 struct nlmsghdr *hdr;
3887 struct rtgenmsg *msg;
bfcedc7e 3888 int saved_errno;
9d036caa
CB
3889 const __s32 ns_id = -1;
3890 const __u32 netns_fd = fd;
1cd95214
CB
3891
3892 ret = netlink_open(&nlh, NETLINK_ROUTE);
3893 if (ret < 0)
41a3300d 3894 return -1;
1cd95214 3895
0ce60f0d 3896 memset(buf, 0, sizeof(buf));
6ce39620
CB
3897
3898#pragma GCC diagnostic push
3899#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3900 hdr = (struct nlmsghdr *)buf;
3901 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3902#pragma GCC diagnostic pop
1cd95214 3903
0ce60f0d
CB
3904 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3905 hdr->nlmsg_type = RTM_NEWNSID;
3906 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3907 hdr->nlmsg_pid = 0;
3908 hdr->nlmsg_seq = RTM_NEWNSID;
3909 msg->rtgen_family = AF_UNSPEC;
1cd95214 3910
9d036caa
CB
3911 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3912 if (ret < 0)
3913 goto on_error;
3914
3915 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3916 if (ret < 0)
3917 goto on_error;
1cd95214 3918
9fbbc427 3919 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3920
3921on_error:
bfcedc7e 3922 saved_errno = errno;
1cd95214 3923 netlink_close(&nlh);
bfcedc7e 3924 errno = saved_errno;
1cd95214 3925
9d036caa 3926 return ret;
1cd95214 3927}
938980ba
CB
3928
3929static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3930{
3931
3932 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3933
3934 while (RTA_OK(rta, len)) {
3935 unsigned short type = rta->rta_type;
3936
3937 if ((type <= max) && (!tb[type]))
3938 tb[type] = rta;
3939
6ce39620
CB
3940#pragma GCC diagnostic push
3941#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3942 rta = RTA_NEXT(rta, len);
6ce39620 3943#pragma GCC diagnostic pop
938980ba
CB
3944 }
3945
3946 return 0;
3947}
3948
3949static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3950{
3951 return *(__s32 *)RTA_DATA(rta);
3952}
3953
3954#ifndef NETNS_RTA
3955#define NETNS_RTA(r) \
3956 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3957#endif
3958
3959int lxc_netns_get_nsid(int fd)
3960{
3961 int ret;
3962 ssize_t len;
3963 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3964 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3965 NLMSG_ALIGN(1024)];
938980ba
CB
3966 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3967 struct nl_handler nlh;
3968 struct nlmsghdr *hdr;
3969 struct rtgenmsg *msg;
3970 int saved_errno;
3971 __u32 netns_fd = fd;
3972
3973 ret = netlink_open(&nlh, NETLINK_ROUTE);
3974 if (ret < 0)
3975 return -1;
3976
3977 memset(buf, 0, sizeof(buf));
6ce39620
CB
3978
3979#pragma GCC diagnostic push
3980#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3981 hdr = (struct nlmsghdr *)buf;
3982 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3983#pragma GCC diagnostic pop
938980ba
CB
3984
3985 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3986 hdr->nlmsg_type = RTM_GETNSID;
3987 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3988 hdr->nlmsg_pid = 0;
3989 hdr->nlmsg_seq = RTM_GETNSID;
3990 msg->rtgen_family = AF_UNSPEC;
3991
9d036caa
CB
3992 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3993 if (ret == 0)
3994 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3995
938980ba
CB
3996 saved_errno = errno;
3997 netlink_close(&nlh);
3998 errno = saved_errno;
3999 if (ret < 0)
4000 return -1;
4001
9d036caa 4002 errno = EINVAL;
938980ba
CB
4003 msg = NLMSG_DATA(hdr);
4004 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4005 if (len < 0)
4006 return -1;
4007
6ce39620
CB
4008#pragma GCC diagnostic push
4009#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4010 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4011 if (tb[__LXC_NETNSA_NSID])
4012 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4013#pragma GCC diagnostic pop
938980ba
CB
4014
4015 return -1;
4016}
e389f2af
CB
4017
4018int lxc_create_network(struct lxc_handler *handler)
4019{
4020 int ret;
4021
4022 /*
4023 * Find gateway addresses from the link device, which is no longer
4024 * accessible inside the container. Do this before creating network
4025 * interfaces, since goto out_delete_net does not work before
4026 * lxc_clone.
4027 */
4028 ret = lxc_find_gateway_addresses(handler);
4029 if (ret) {
4030 ERROR("Failed to find gateway addresses");
4031 return -1;
4032 }
4033
4034 if (handler->am_root) {
4035 ret = lxc_create_network_priv(handler);
4036 if (ret)
4037 return -1;
4038
4039 return lxc_network_move_created_netdev_priv(handler);
4040 }
4041
4042 return lxc_create_network_unpriv(handler);
4043}