]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: record created_name for instantiate_ipvlan()
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 71static const char loop_device[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
6d1400b5 225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
227 goto out_delete;
228 }
229
8da62485
CB
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
811ef482
CB
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
de4855a8 253 } else if (netdev->link[0] != '\0') {
811ef482
CB
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 268
811ef482 269 if (err) {
6d1400b5 270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
273 goto out_delete;
274 }
275 }
276
de4855a8 277 if (netdev->link[0] != '\0') {
811ef482
CB
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
6d1400b5 280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
811ef482
CB
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
6d1400b5 290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
292 goto out_delete;
293 }
294
d4a7da46 295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
811ef482 307 if (netdev->upscript) {
14a7b0f9
CB
308 char *argv[] = {
309 "veth",
310 netdev->link,
990b9ac3 311 veth1,
14a7b0f9
CB
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
811ef482
CB
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
811ef482
CB
330 return -1;
331}
332
333static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334{
8021de25 335 char peer[IFNAMSIZ];
811ef482 336 int err;
3bef7b7b 337 unsigned int mtu = 0;
811ef482 338
de4855a8 339 if (netdev->link[0] == '\0') {
811ef482
CB
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
8021de25
CB
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
346 return -1;
347
8021de25 348 if (!lxc_mkifname(peer))
811ef482
CB
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
6d1400b5 354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
966e9f1f 357 goto on_error;
811ef482
CB
358 }
359
a9704f05
CB
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
811ef482
CB
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 365 goto on_error;
811ef482
CB
366 }
367
3bef7b7b
TP
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
811ef482 384 if (netdev->upscript) {
14a7b0f9
CB
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
966e9f1f 395 goto on_error;
811ef482
CB
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
966e9f1f
CB
402
403on_error:
811ef482 404 lxc_netdev_delete_by_name(peer);
811ef482
CB
405 return -1;
406}
407
c9f52382 408static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409{
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493}
494
495static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496{
dd119206 497 char peer[IFNAMSIZ];
c9f52382 498 int err;
006e135e 499 unsigned int mtu = 0;
c9f52382 500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
dd119206
CB
506 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 508 return -1;
509
dd119206 510 if (!lxc_mkifname(peer))
c9f52382 511 return -1;
512
dd119206
CB
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
514 netdev->priv.ipvlan_attr.isolation);
c9f52382 515 if (err) {
dd119206
CB
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
517 peer, netdev->link);
c9f52382 518 goto on_error;
519 }
520
e7fdd504
CB
521 strlcpy(netdev->created_name, peer, IFNAMSIZ);
522
c9f52382 523 netdev->ifindex = if_nametoindex(peer);
524 if (!netdev->ifindex) {
525 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
526 goto on_error;
527 }
528
006e135e 529 if (netdev->mtu) {
530 err = lxc_safe_uint(netdev->mtu, &mtu);
531 if (err < 0) {
532 errno = -err;
dd119206
CB
533 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
534 netdev->mtu, peer);
006e135e 535 goto on_error;
536 }
537
538 err = lxc_netdev_set_mtu(peer, mtu);
539 if (err < 0) {
540 errno = -err;
dd119206
CB
541 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
542 netdev->mtu, peer);
006e135e 543 goto on_error;
544 }
545 }
546
c9f52382 547 if (netdev->upscript) {
548 char *argv[] = {
549 "ipvlan",
550 netdev->link,
551 NULL,
552 };
553
dd119206
CB
554 err = run_script_argv(handler->name, handler->conf->hooks_version,
555 "net", netdev->upscript, "up", argv);
c9f52382 556 if (err < 0)
557 goto on_error;
558 }
559
dd119206
CB
560 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
561 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 562
563 return 0;
564
565on_error:
566 lxc_netdev_delete_by_name(peer);
567 return -1;
568}
569
811ef482
CB
570static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
571{
572 char peer[IFNAMSIZ];
573 int err;
574 static uint16_t vlan_cntr = 0;
575 unsigned int mtu = 0;
576
de4855a8 577 if (netdev->link[0] == '\0') {
811ef482
CB
578 ERROR("No link for vlan network device specified");
579 return -1;
580 }
581
582 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
583 if (err < 0 || (size_t)err >= sizeof(peer))
584 return -1;
585
586 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
587 if (err) {
6d1400b5 588 errno = -err;
589 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
590 peer, netdev->link);
811ef482
CB
591 return -1;
592 }
593
594 netdev->ifindex = if_nametoindex(peer);
595 if (!netdev->ifindex) {
596 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 597 goto on_error;
598 }
599
600 if (netdev->mtu) {
601 err = lxc_safe_uint(netdev->mtu, &mtu);
602 if (err < 0) {
603 errno = -err;
604 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
605 goto on_error;
606 }
607
608 err = lxc_netdev_set_mtu(peer, mtu);
609 if (err) {
610 errno = -err;
611 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
612 goto on_error;
613 }
811ef482
CB
614 }
615
3a73d9f1 616 if (netdev->upscript) {
617 char *argv[] = {
618 "vlan",
619 netdev->link,
620 NULL,
621 };
622
623 err = run_script_argv(handler->name,
624 handler->conf->hooks_version, "net",
625 netdev->upscript, "up", argv);
19abca58 626 if (err < 0) {
3e2a7b08 627 goto on_error;
19abca58 628 }
3a73d9f1 629 }
630
3bef7b7b 631 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
811ef482 632 peer, netdev->ifindex);
811ef482
CB
633
634 return 0;
3e2a7b08 635
636on_error:
637 lxc_netdev_delete_by_name(peer);
638 return -1;
811ef482
CB
639}
640
641static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
642{
0b154989 643 int err, mtu_orig = 0;
3bef7b7b 644 unsigned int mtu = 0;
14a7b0f9 645
de4855a8 646 if (netdev->link[0] == '\0') {
811ef482
CB
647 ERROR("No link for physical interface specified");
648 return -1;
649 }
650
790255cf
CB
651 /* Note that we're retrieving the container's ifindex in the host's
652 * network namespace because we need it to move the device from the
653 * host's network namespace to the container's network namespace later
654 * on.
655 * Note that netdev->link will contain the name of the physical network
656 * device in the host's namespace.
657 */
811ef482
CB
658 netdev->ifindex = if_nametoindex(netdev->link);
659 if (!netdev->ifindex) {
660 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
661 return -1;
662 }
663
790255cf
CB
664 /* Store the ifindex of the host's network device in the host's
665 * namespace.
666 */
667 netdev->priv.phys_attr.ifindex = netdev->ifindex;
668
0b154989
TP
669 /* Get original device MTU setting and store for restoration after container shutdown. */
670 mtu_orig = netdev_get_mtu(netdev->ifindex);
671 if (mtu_orig < 0) {
672 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
673 return minus_one_set_errno(-mtu_orig);
674 }
675
676 netdev->priv.phys_attr.mtu = mtu_orig;
677
3bef7b7b
TP
678 if (netdev->mtu) {
679 err = lxc_safe_uint(netdev->mtu, &mtu);
680 if (err < 0) {
681 errno = -err;
682 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
683 return -1;
684 }
14a7b0f9 685
3bef7b7b
TP
686 err = lxc_netdev_set_mtu(netdev->link, mtu);
687 if (err < 0) {
688 errno = -err;
689 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
690 return -1;
691 }
692 }
693
694 if (netdev->upscript) {
695 char *argv[] = {
696 "phys",
697 netdev->link,
698 NULL,
699 };
700
701 err = run_script_argv(handler->name,
702 handler->conf->hooks_version, "net",
703 netdev->upscript, "up", argv);
704 if (err < 0) {
705 return -1;
706 }
707 }
708
709 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
811ef482
CB
710
711 return 0;
712}
713
714static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
715{
14a7b0f9
CB
716 int ret;
717 char *argv[] = {
718 "empty",
719 NULL,
720 };
721
811ef482 722 netdev->ifindex = 0;
14a7b0f9
CB
723 if (!netdev->upscript)
724 return 0;
725
726 ret = run_script_argv(handler->name, handler->conf->hooks_version,
727 "net", netdev->upscript, "up", argv);
728 if (ret < 0)
729 return -1;
730
811ef482
CB
731 return 0;
732}
733
734static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
735{
736 netdev->ifindex = 0;
737 return 0;
738}
739
740static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
741 [LXC_NET_VETH] = instantiate_veth,
742 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 743 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
744 [LXC_NET_VLAN] = instantiate_vlan,
745 [LXC_NET_PHYS] = instantiate_phys,
746 [LXC_NET_EMPTY] = instantiate_empty,
747 [LXC_NET_NONE] = instantiate_none,
748};
749
750static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
751{
14a7b0f9
CB
752 int ret;
753 char *argv[] = {
754 "veth",
755 netdev->link,
756 NULL,
757 NULL,
758 };
759
760 if (!netdev->downscript)
761 return 0;
811ef482 762
de4855a8 763 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 764 argv[2] = netdev->priv.veth_attr.pair;
811ef482 765 else
14a7b0f9
CB
766 argv[2] = netdev->priv.veth_attr.veth1;
767
768 ret = run_script_argv(handler->name,
769 handler->conf->hooks_version, "net",
770 netdev->downscript, "down", argv);
771 if (ret < 0)
772 return -1;
811ef482 773
811ef482
CB
774 return 0;
775}
776
777static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
778{
14a7b0f9
CB
779 int ret;
780 char *argv[] = {
781 "macvlan",
782 netdev->link,
783 NULL,
784 };
785
786 if (!netdev->downscript)
787 return 0;
788
789 ret = run_script_argv(handler->name, handler->conf->hooks_version,
790 "net", netdev->downscript, "down", argv);
791 if (ret < 0)
792 return -1;
811ef482 793
811ef482
CB
794 return 0;
795}
796
c9f52382 797static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
798{
799 int ret;
800 char *argv[] = {
801 "ipvlan",
802 netdev->link,
803 NULL,
804 };
805
806 if (!netdev->downscript)
807 return 0;
808
809 ret = run_script_argv(handler->name, handler->conf->hooks_version,
810 "net", netdev->downscript, "down", argv);
811 if (ret < 0)
812 return -1;
813
814 return 0;
815}
816
811ef482
CB
817static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
818{
3a73d9f1 819 int ret;
820 char *argv[] = {
821 "vlan",
822 netdev->link,
823 NULL,
824 };
825
826 if (!netdev->downscript)
827 return 0;
828
829 ret = run_script_argv(handler->name, handler->conf->hooks_version,
830 "net", netdev->downscript, "down", argv);
831 if (ret < 0)
832 return -1;
833
811ef482
CB
834 return 0;
835}
836
837static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
838{
14a7b0f9
CB
839 int ret;
840 char *argv[] = {
841 "phys",
842 netdev->link,
843 NULL,
844 };
845
846 if (!netdev->downscript)
847 return 0;
848
849 ret = run_script_argv(handler->name, handler->conf->hooks_version,
850 "net", netdev->downscript, "down", argv);
851 if (ret < 0)
852 return -1;
811ef482 853
811ef482
CB
854 return 0;
855}
856
857static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
858{
14a7b0f9
CB
859 int ret;
860 char *argv[] = {
861 "empty",
862 NULL,
863 };
864
865 if (!netdev->downscript)
866 return 0;
867
868 ret = run_script_argv(handler->name, handler->conf->hooks_version,
869 "net", netdev->downscript, "down", argv);
870 if (ret < 0)
871 return -1;
811ef482 872
811ef482
CB
873 return 0;
874}
875
876static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
877{
878 return 0;
879}
880
881static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
882 [LXC_NET_VETH] = shutdown_veth,
883 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 884 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
885 [LXC_NET_VLAN] = shutdown_vlan,
886 [LXC_NET_PHYS] = shutdown_phys,
887 [LXC_NET_EMPTY] = shutdown_empty,
888 [LXC_NET_NONE] = shutdown_none,
889};
890
0037ab49
TP
891static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
892{
893 int err;
894 struct nl_handler nlh;
895 struct ifinfomsg *ifi;
896 struct nlmsg *nlmsg = NULL;
897
898 err = netlink_open(&nlh, NETLINK_ROUTE);
899 if (err)
900 return err;
901
902 err = -ENOMEM;
903 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
904 if (!nlmsg)
905 goto out;
906
907 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
908 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
909
910 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
911 if (!ifi)
912 goto out;
913 ifi->ifi_family = AF_UNSPEC;
914 ifi->ifi_index = ifindex;
915
916 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
917 goto out;
918
919 if (ifname != NULL) {
920 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
921 goto out;
922 }
923
924 err = netlink_transaction(&nlh, nlmsg, nlmsg);
925out:
926 netlink_close(&nlh);
927 nlmsg_free(nlmsg);
928 return err;
929}
930
ebc73a67 931int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 932{
ebc73a67 933 int err;
0ad19a3f 934 struct nl_handler nlh;
06f976ca 935 struct ifinfomsg *ifi;
ebc73a67 936 struct nlmsg *nlmsg = NULL;
0ad19a3f 937
3cfc0f3a
MN
938 err = netlink_open(&nlh, NETLINK_ROUTE);
939 if (err)
940 return err;
0ad19a3f 941
3cfc0f3a 942 err = -ENOMEM;
0ad19a3f 943 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
944 if (!nlmsg)
945 goto out;
946
ebc73a67 947 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
948 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
949
950 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
951 if (!ifi)
952 goto out;
06f976ca
SZ
953 ifi->ifi_family = AF_UNSPEC;
954 ifi->ifi_index = ifindex;
0ad19a3f 955
956 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
957 goto out;
958
8d357196
DY
959 if (ifname != NULL) {
960 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
961 goto out;
962 }
963
3cfc0f3a 964 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 965out:
966 netlink_close(&nlh);
967 nlmsg_free(nlmsg);
968 return err;
969}
970
ebc73a67
CB
971/* If we are asked to move a wireless interface, then we must actually move its
972 * phyN device. Detect that condition and return the physname here. The physname
973 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
974 */
975#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 976static char *is_wlan(const char *ifname)
e5848d39 977{
b0293710 978 __do_free char *path = NULL;
ebc73a67 979 int i, ret;
e5848d39 980 long physlen;
ebc73a67 981 size_t len;
e5848d39 982 FILE *f;
ebc73a67 983 char *physname = NULL;
e5848d39 984
ebc73a67 985 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 986 path = must_realloc(NULL, len + 1);
e5848d39 987 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 988 if (ret < 0 || (size_t)ret >= len)
e5848d39 989 goto bad;
ebc73a67 990
ebc73a67
CB
991 f = fopen(path, "r");
992 if (!f)
e5848d39 993 goto bad;
ebc73a67 994
1a0e70ac 995 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
996 fseek(f, 0, SEEK_END);
997 physlen = ftell(f);
998 fseek(f, 0, SEEK_SET);
7d1cde93
SX
999 if (physlen < 0) {
1000 fclose(f);
0382c0da 1001 goto bad;
7d1cde93 1002 }
ebc73a67
CB
1003
1004 physname = malloc(physlen + 1);
ee54ea9a 1005 if (!physname) {
acf47e1b 1006 fclose(f);
e5848d39 1007 goto bad;
ee54ea9a 1008 }
ebc73a67
CB
1009
1010 memset(physname, 0, physlen + 1);
e5848d39
SH
1011 ret = fread(physname, 1, physlen, f);
1012 fclose(f);
1013 if (ret < 0)
1014 goto bad;
1015
ebc73a67 1016 for (i = 0; i < physlen; i++) {
e5848d39
SH
1017 if (physname[i] == '\n')
1018 physname[i] = '\0';
ebc73a67 1019
e5848d39
SH
1020 if (physname[i] == '\0')
1021 break;
1022 }
1023
1024 return physname;
1025
1026bad:
f10fad2f 1027 free(physname);
e5848d39
SH
1028 return NULL;
1029}
1030
ebc73a67
CB
1031static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1032 const char *new)
e5848d39 1033{
ebc73a67 1034 pid_t fpid;
e5848d39 1035
ebc73a67 1036 fpid = fork();
e5848d39
SH
1037 if (fpid < 0)
1038 return -1;
ebc73a67 1039
e5848d39
SH
1040 if (fpid != 0)
1041 return wait_for_pid(fpid);
ebc73a67 1042
e5848d39
SH
1043 if (!switch_to_ns(pid, "net"))
1044 return -1;
ebc73a67 1045
05ec44f8 1046 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1047}
1048
ebc73a67
CB
1049static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1050 const char *newname)
e5848d39 1051{
e5848d39 1052 char *cmd;
ebc73a67
CB
1053 pid_t fpid;
1054 int err = -1;
e5848d39
SH
1055
1056 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1057 * However, IIUC this involves a bit more complicated work to talk to
1058 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1059 */
1060 cmd = on_path("iw", NULL);
1061 if (!cmd)
1062 goto out1;
1063 free(cmd);
1064
1065 fpid = fork();
1066 if (fpid < 0)
1067 goto out1;
ebc73a67 1068
e5848d39
SH
1069 if (fpid == 0) {
1070 char pidstr[30];
1071 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1072 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1073 (char *)NULL);
ebd582ae 1074 _exit(EXIT_FAILURE);
e5848d39 1075 }
ebc73a67 1076
e5848d39
SH
1077 if (wait_for_pid(fpid))
1078 goto out1;
1079
1080 err = 0;
1081 if (newname)
1082 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1083
1084out1:
1085 free(physname);
1086 return err;
1087}
1088
8d357196 1089int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1090{
1091 int index;
e5848d39 1092 char *physname;
8befa924 1093
8befa924
SH
1094 if (!ifname)
1095 return -EINVAL;
1096
32571606 1097 index = if_nametoindex(ifname);
49428bf3
DY
1098 if (!index)
1099 return -EINVAL;
32571606 1100
ebc73a67
CB
1101 physname = is_wlan(ifname);
1102 if (physname)
e5848d39
SH
1103 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1104
8d357196 1105 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1106}
1107
b84f58b9 1108int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1109{
b84f58b9 1110 int err;
ebc73a67
CB
1111 struct ifinfomsg *ifi;
1112 struct nl_handler nlh;
1113 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1114
3cfc0f3a
MN
1115 err = netlink_open(&nlh, NETLINK_ROUTE);
1116 if (err)
1117 return err;
0ad19a3f 1118
3cfc0f3a 1119 err = -ENOMEM;
0ad19a3f 1120 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1121 if (!nlmsg)
1122 goto out;
1123
06f976ca 1124 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1125 if (!answer)
1126 goto out;
1127
ebc73a67 1128 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1129 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1130
1131 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1132 if (!ifi)
1133 goto out;
06f976ca
SZ
1134 ifi->ifi_family = AF_UNSPEC;
1135 ifi->ifi_index = ifindex;
0ad19a3f 1136
3cfc0f3a 1137 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1138out:
1139 netlink_close(&nlh);
1140 nlmsg_free(answer);
1141 nlmsg_free(nlmsg);
1142 return err;
1143}
1144
b84f58b9
DL
1145int lxc_netdev_delete_by_name(const char *name)
1146{
1147 int index;
1148
1149 index = if_nametoindex(name);
1150 if (!index)
1151 return -EINVAL;
1152
1153 return lxc_netdev_delete_by_index(index);
1154}
1155
1156int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1157{
ebc73a67 1158 int err, len;
06f976ca 1159 struct ifinfomsg *ifi;
ebc73a67
CB
1160 struct nl_handler nlh;
1161 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1162
3cfc0f3a
MN
1163 err = netlink_open(&nlh, NETLINK_ROUTE);
1164 if (err)
1165 return err;
b9a5bb58 1166
b84f58b9 1167 len = strlen(newname);
90d79629
CB
1168 if (len == 1 || len >= IFNAMSIZ) {
1169 err = -EINVAL;
b84f58b9 1170 goto out;
90d79629 1171 }
b84f58b9 1172
3cfc0f3a 1173 err = -ENOMEM;
b9a5bb58
DL
1174 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1175 if (!nlmsg)
1176 goto out;
1177
06f976ca 1178 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1179 if (!answer)
1180 goto out;
1181
ebc73a67 1182 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1183 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1184
1185 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1186 if (!ifi)
1187 goto out;
06f976ca
SZ
1188 ifi->ifi_family = AF_UNSPEC;
1189 ifi->ifi_index = ifindex;
b84f58b9
DL
1190
1191 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1192 goto out;
b9a5bb58 1193
3cfc0f3a 1194 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1195out:
1196 netlink_close(&nlh);
1197 nlmsg_free(answer);
1198 nlmsg_free(nlmsg);
1199 return err;
1200}
1201
b84f58b9
DL
1202int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1203{
1204 int len, index;
1205
1206 len = strlen(oldname);
dae3fdf6 1207 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1208 return -EINVAL;
1209
1210 index = if_nametoindex(oldname);
1211 if (!index)
1212 return -EINVAL;
1213
1214 return lxc_netdev_rename_by_index(index, newname);
1215}
1216
8befa924 1217int netdev_set_flag(const char *name, int flag)
0ad19a3f 1218{
ebc73a67 1219 int err, index, len;
06f976ca 1220 struct ifinfomsg *ifi;
ebc73a67
CB
1221 struct nl_handler nlh;
1222 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1223
3cfc0f3a
MN
1224 err = netlink_open(&nlh, NETLINK_ROUTE);
1225 if (err)
1226 return err;
0ad19a3f 1227
3cfc0f3a 1228 err = -EINVAL;
0ad19a3f 1229 len = strlen(name);
dae3fdf6 1230 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1231 goto out;
1232
3cfc0f3a 1233 err = -ENOMEM;
0ad19a3f 1234 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1235 if (!nlmsg)
1236 goto out;
1237
06f976ca 1238 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1239 if (!answer)
1240 goto out;
1241
3cfc0f3a 1242 err = -EINVAL;
0ad19a3f 1243 index = if_nametoindex(name);
1244 if (!index)
1245 goto out;
1246
ebc73a67 1247 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1248 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1249
1250 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1251 if (!ifi) {
1252 err = -ENOMEM;
1253 goto out;
1254 }
06f976ca
SZ
1255 ifi->ifi_family = AF_UNSPEC;
1256 ifi->ifi_index = index;
1257 ifi->ifi_change |= IFF_UP;
1258 ifi->ifi_flags |= flag;
0ad19a3f 1259
1260 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1261out:
1262 netlink_close(&nlh);
1263 nlmsg_free(nlmsg);
1264 nlmsg_free(answer);
1265 return err;
1266}
1267
ebc73a67 1268int netdev_get_flag(const char *name, int *flag)
efa1cf45 1269{
ebc73a67 1270 int err, index, len;
a4318300 1271 struct ifinfomsg *ifi;
ebc73a67
CB
1272 struct nl_handler nlh;
1273 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1274
1275 if (!name)
1276 return -EINVAL;
1277
1278 err = netlink_open(&nlh, NETLINK_ROUTE);
1279 if (err)
1280 return err;
1281
1282 err = -EINVAL;
1283 len = strlen(name);
1284 if (len == 1 || len >= IFNAMSIZ)
1285 goto out;
1286
1287 err = -ENOMEM;
1288 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1289 if (!nlmsg)
1290 goto out;
1291
06f976ca 1292 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1293 if (!answer)
1294 goto out;
1295
1296 err = -EINVAL;
1297 index = if_nametoindex(name);
1298 if (!index)
1299 goto out;
1300
06f976ca
SZ
1301 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1302 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1303
1304 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1305 if (!ifi) {
1306 err = -ENOMEM;
1307 goto out;
1308 }
06f976ca
SZ
1309 ifi->ifi_family = AF_UNSPEC;
1310 ifi->ifi_index = index;
efa1cf45
DY
1311
1312 err = netlink_transaction(&nlh, nlmsg, answer);
1313 if (err)
1314 goto out;
1315
06f976ca 1316 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1317
1318 *flag = ifi->ifi_flags;
1319out:
1320 netlink_close(&nlh);
1321 nlmsg_free(nlmsg);
1322 nlmsg_free(answer);
1323 return err;
1324}
1325
1326/*
1327 * \brief Check a interface is up or not.
1328 *
1329 * \param name: name for the interface.
1330 *
1331 * \return int.
1332 * 0 means interface is down.
1333 * 1 means interface is up.
1334 * Others means error happened, and ret-value is the error number.
1335 */
ebc73a67 1336int lxc_netdev_isup(const char *name)
efa1cf45 1337{
ebc73a67 1338 int err, flag;
efa1cf45
DY
1339
1340 err = netdev_get_flag(name, &flag);
1341 if (err)
ebc73a67
CB
1342 return err;
1343
efa1cf45
DY
1344 if (flag & IFF_UP)
1345 return 1;
ebc73a67 1346
efa1cf45 1347 return 0;
efa1cf45
DY
1348}
1349
0130df54
SH
1350int netdev_get_mtu(int ifindex)
1351{
ebc73a67 1352 int answer_len, err, res;
0130df54 1353 struct nl_handler nlh;
06f976ca 1354 struct ifinfomsg *ifi;
0130df54 1355 struct nlmsghdr *msg;
ebc73a67
CB
1356 int readmore = 0, recv_len = 0;
1357 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1358
1359 err = netlink_open(&nlh, NETLINK_ROUTE);
1360 if (err)
1361 return err;
1362
1363 err = -ENOMEM;
1364 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1365 if (!nlmsg)
1366 goto out;
1367
06f976ca 1368 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1369 if (!answer)
1370 goto out;
1371
1372 /* Save the answer buffer length, since it will be overwritten
1373 * on the first receive (and we might need to receive more than
ebc73a67
CB
1374 * once.
1375 */
06f976ca
SZ
1376 answer_len = answer->nlmsghdr->nlmsg_len;
1377
ebc73a67 1378 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1379 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1380
06f976ca 1381 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1382 if (!ifi)
1383 goto out;
06f976ca 1384 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1385
1386 /* Send the request for addresses, which returns all addresses
1387 * on all interfaces. */
1388 err = netlink_send(&nlh, nlmsg);
1389 if (err < 0)
1390 goto out;
1391
6ce39620
CB
1392#pragma GCC diagnostic push
1393#pragma GCC diagnostic ignored "-Wcast-align"
1394
0130df54
SH
1395 do {
1396 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1397 * overwritten by a previous receive.
1398 */
06f976ca 1399 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1400
1401 /* Get the (next) batch of reply messages */
1402 err = netlink_rcv(&nlh, answer);
1403 if (err < 0)
1404 goto out;
1405
1406 recv_len = err;
0130df54
SH
1407
1408 /* Satisfy the typing for the netlink macros */
06f976ca 1409 msg = answer->nlmsghdr;
0130df54
SH
1410
1411 while (NLMSG_OK(msg, recv_len)) {
1412
1413 /* Stop reading if we see an error message */
1414 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1415 struct nlmsgerr *errmsg =
1416 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1417 err = errmsg->error;
1418 goto out;
1419 }
1420
1421 /* Stop reading if we see a NLMSG_DONE message */
1422 if (msg->nlmsg_type == NLMSG_DONE) {
1423 readmore = 0;
1424 break;
1425 }
1426
06f976ca 1427 ifi = NLMSG_DATA(msg);
0130df54
SH
1428 if (ifi->ifi_index == ifindex) {
1429 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1430 int attr_len =
1431 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1432 res = 0;
ebc73a67
CB
1433 while (RTA_OK(rta, attr_len)) {
1434 /* Found a local address for the
1435 * requested interface, return it.
1436 */
0130df54 1437 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1438 memcpy(&res, RTA_DATA(rta),
1439 sizeof(int));
0130df54
SH
1440 err = res;
1441 goto out;
1442 }
1443 rta = RTA_NEXT(rta, attr_len);
1444 }
0130df54
SH
1445 }
1446
ebc73a67
CB
1447 /* Keep reading more data from the socket if the last
1448 * message had the NLF_F_MULTI flag set.
1449 */
0130df54
SH
1450 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1451
ebc73a67 1452 /* Look at the next message received in this buffer. */
0130df54
SH
1453 msg = NLMSG_NEXT(msg, recv_len);
1454 }
1455 } while (readmore);
1456
6ce39620
CB
1457#pragma GCC diagnostic pop
1458
ebc73a67 1459 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1460 err = -1;
1461
1462out:
1463 netlink_close(&nlh);
1464 nlmsg_free(answer);
1465 nlmsg_free(nlmsg);
1466 return err;
1467}
1468
d472214b 1469int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1470{
ebc73a67 1471 int err, index, len;
06f976ca 1472 struct ifinfomsg *ifi;
ebc73a67
CB
1473 struct nl_handler nlh;
1474 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1475
3cfc0f3a
MN
1476 err = netlink_open(&nlh, NETLINK_ROUTE);
1477 if (err)
1478 return err;
75d09f83 1479
3cfc0f3a 1480 err = -EINVAL;
75d09f83 1481 len = strlen(name);
dae3fdf6 1482 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1483 goto out;
1484
3cfc0f3a 1485 err = -ENOMEM;
75d09f83
DL
1486 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1487 if (!nlmsg)
1488 goto out;
1489
06f976ca 1490 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1491 if (!answer)
1492 goto out;
1493
3cfc0f3a 1494 err = -EINVAL;
75d09f83
DL
1495 index = if_nametoindex(name);
1496 if (!index)
1497 goto out;
1498
ebc73a67 1499 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1500 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1501
1502 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1503 if (!ifi) {
1504 err = -ENOMEM;
1505 goto out;
1506 }
06f976ca
SZ
1507 ifi->ifi_family = AF_UNSPEC;
1508 ifi->ifi_index = index;
75d09f83
DL
1509
1510 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1511 goto out;
1512
1513 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1514out:
1515 netlink_close(&nlh);
1516 nlmsg_free(nlmsg);
1517 nlmsg_free(answer);
1518 return err;
1519}
1520
d472214b 1521int lxc_netdev_up(const char *name)
0ad19a3f 1522{
d472214b 1523 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1524}
1525
d472214b 1526int lxc_netdev_down(const char *name)
0ad19a3f 1527{
d472214b 1528 return netdev_set_flag(name, 0);
0ad19a3f 1529}
1530
497353b6 1531int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1532{
ebc73a67 1533 int err, len;
06f976ca 1534 struct ifinfomsg *ifi;
ebc73a67 1535 struct nl_handler nlh;
0ad19a3f 1536 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1537 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1538
3cfc0f3a
MN
1539 err = netlink_open(&nlh, NETLINK_ROUTE);
1540 if (err)
1541 return err;
0ad19a3f 1542
3cfc0f3a 1543 err = -EINVAL;
0ad19a3f 1544 len = strlen(name1);
dae3fdf6 1545 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1546 goto out;
1547
1548 len = strlen(name2);
dae3fdf6 1549 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1550 goto out;
1551
3cfc0f3a 1552 err = -ENOMEM;
0ad19a3f 1553 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1554 if (!nlmsg)
1555 goto out;
1556
06f976ca 1557 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1558 if (!answer)
1559 goto out;
1560
06f976ca 1561 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1562 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1563 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1564
1565 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1566 if (!ifi)
1567 goto out;
06f976ca 1568 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1569
3cfc0f3a 1570 err = -EINVAL;
79e68309 1571 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1572 if (!nest1)
1573 goto out;
1574
1575 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1576 goto out;
1577
1578 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1579 if (!nest2)
1580 goto out;
1581
1582 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1583 if (!nest3)
1584 goto out;
1585
06f976ca 1586 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1587 if (!ifi) {
1588 err = -ENOMEM;
06f976ca 1589 goto out;
25a9939b 1590 }
0ad19a3f 1591
1592 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1593 goto out;
1594
1595 nla_end_nested(nlmsg, nest3);
0ad19a3f 1596 nla_end_nested(nlmsg, nest2);
0ad19a3f 1597 nla_end_nested(nlmsg, nest1);
1598
1599 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1600 goto out;
1601
3cfc0f3a 1602 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1603out:
1604 netlink_close(&nlh);
1605 nlmsg_free(answer);
1606 nlmsg_free(nlmsg);
1607 return err;
1608}
1609
ebc73a67 1610/* TODO: merge with lxc_macvlan_create */
7c11d57a 1611int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1612{
ebc73a67 1613 int err, len, lindex;
06f976ca 1614 struct ifinfomsg *ifi;
ebc73a67 1615 struct nl_handler nlh;
26c39028 1616 struct rtattr *nest, *nest2;
ebc73a67 1617 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1618
3cfc0f3a
MN
1619 err = netlink_open(&nlh, NETLINK_ROUTE);
1620 if (err)
1621 return err;
26c39028 1622
3cfc0f3a 1623 err = -EINVAL;
26c39028 1624 len = strlen(master);
dae3fdf6 1625 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1626 goto err3;
1627
1628 len = strlen(name);
dae3fdf6 1629 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1630 goto err3;
1631
3cfc0f3a 1632 err = -ENOMEM;
26c39028
JHS
1633 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1634 if (!nlmsg)
1635 goto err3;
1636
06f976ca 1637 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1638 if (!answer)
1639 goto err2;
1640
3cfc0f3a 1641 err = -EINVAL;
26c39028
JHS
1642 lindex = if_nametoindex(master);
1643 if (!lindex)
1644 goto err1;
1645
06f976ca 1646 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1647 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1648 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1649
1650 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1651 if (!ifi) {
1652 err = -ENOMEM;
1653 goto err1;
1654 }
06f976ca 1655 ifi->ifi_family = AF_UNSPEC;
26c39028 1656
79e68309 1657 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1658 if (!nest)
1659 goto err1;
1660
1661 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1662 goto err1;
1663
1664 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1665 if (!nest2)
1666 goto err1;
e892973e 1667
26c39028
JHS
1668 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1669 goto err1;
e892973e 1670
26c39028 1671 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1672 nla_end_nested(nlmsg, nest);
1673
1674 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1675 goto err1;
1676
1677 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1678 goto err1;
1679
3cfc0f3a 1680 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1681err1:
1682 nlmsg_free(answer);
1683err2:
1684 nlmsg_free(nlmsg);
1685err3:
1686 netlink_close(&nlh);
1687 return err;
1688}
1689
e892973e 1690int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1691{
ebc73a67 1692 int err, index, len;
06f976ca 1693 struct ifinfomsg *ifi;
ebc73a67 1694 struct nl_handler nlh;
e892973e 1695 struct rtattr *nest, *nest2;
ebc73a67 1696 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1697
3cfc0f3a
MN
1698 err = netlink_open(&nlh, NETLINK_ROUTE);
1699 if (err)
1700 return err;
0ad19a3f 1701
3cfc0f3a 1702 err = -EINVAL;
0ad19a3f 1703 len = strlen(master);
dae3fdf6 1704 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1705 goto out;
1706
1707 len = strlen(name);
dae3fdf6 1708 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1709 goto out;
1710
3cfc0f3a 1711 err = -ENOMEM;
0ad19a3f 1712 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1713 if (!nlmsg)
1714 goto out;
1715
06f976ca 1716 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1717 if (!answer)
1718 goto out;
1719
3cfc0f3a 1720 err = -EINVAL;
0ad19a3f 1721 index = if_nametoindex(master);
1722 if (!index)
1723 goto out;
1724
06f976ca 1725 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1726 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1727 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1728
1729 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1730 if (!ifi) {
1731 err = -ENOMEM;
1732 goto out;
1733 }
06f976ca 1734 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1735
79e68309 1736 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1737 if (!nest)
1738 goto out;
1739
1740 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1741 goto out;
1742
e892973e
DL
1743 if (mode) {
1744 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1745 if (!nest2)
1746 goto out;
1747
1748 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1749 goto out;
1750
1751 nla_end_nested(nlmsg, nest2);
1752 }
1753
0ad19a3f 1754 nla_end_nested(nlmsg, nest);
1755
1756 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1757 goto out;
1758
1759 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1760 goto out;
1761
3cfc0f3a 1762 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1763out:
1764 netlink_close(&nlh);
1765 nlmsg_free(answer);
1766 nlmsg_free(nlmsg);
1767 return err;
1768}
1769
1770static int proc_sys_net_write(const char *path, const char *value)
1771{
ebc73a67
CB
1772 int fd;
1773 int err = 0;
0ad19a3f 1774
1775 fd = open(path, O_WRONLY);
1776 if (fd < 0)
1777 return -errno;
1778
f640cf46 1779 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1780 err = -errno;
1781
1782 close(fd);
1783 return err;
1784}
1785
6509154d 1786static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1787{
1788 int ret;
1789 char path[PATH_MAX];
1790 char buf[1] = "";
1791
1792 if (family != AF_INET && family != AF_INET6)
1793 return minus_one_set_errno(EINVAL);
1794
1795 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1796 family == AF_INET ? "ipv4" : "ipv6", ifname,
1797 "forwarding");
1798 if (ret < 0 || (size_t)ret >= PATH_MAX)
1799 return minus_one_set_errno(E2BIG);
1800
1801 return lxc_read_file_expect(path, buf, 1, "1");
1802}
1803
0ad19a3f 1804static int neigh_proxy_set(const char *ifname, int family, int flag)
1805{
9ba8130c 1806 int ret;
419590da 1807 char path[PATH_MAX];
0ad19a3f 1808
1809 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1810 return -EINVAL;
0ad19a3f 1811
419590da 1812 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1813 family == AF_INET ? "ipv4" : "ipv6", ifname,
1814 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1815 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1816 return -E2BIG;
0ad19a3f 1817
ebc73a67 1818 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1819}
1820
6509154d 1821static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1822{
1823 int ret;
1824 char path[PATH_MAX];
1825 char buf[1] = "";
1826
1827 if (family != AF_INET && family != AF_INET6)
1828 return minus_one_set_errno(EINVAL);
1829
1830 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1831 family == AF_INET ? "ipv4" : "ipv6", ifname,
1832 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1833 if (ret < 0 || (size_t)ret >= PATH_MAX)
1834 return minus_one_set_errno(E2BIG);
1835
1836 return lxc_read_file_expect(path, buf, 1, "1");
1837}
1838
497353b6 1839int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1840{
1841 return neigh_proxy_set(name, family, 1);
1842}
1843
497353b6 1844int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1845{
1846 return neigh_proxy_set(name, family, 0);
1847}
1848
1849int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1850{
1f1b18e7
DL
1851 int i = 0;
1852 unsigned val;
ebc73a67
CB
1853 char c;
1854 unsigned char *data;
1f1b18e7
DL
1855
1856 sockaddr->sa_family = ARPHRD_ETHER;
1857 data = (unsigned char *)sockaddr->sa_data;
1858
1859 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1860 c = *macaddr++;
1861 if (isdigit(c))
1862 val = c - '0';
1863 else if (c >= 'a' && c <= 'f')
1864 val = c - 'a' + 10;
1865 else if (c >= 'A' && c <= 'F')
1866 val = c - 'A' + 10;
1867 else
1868 return -EINVAL;
1869
1870 val <<= 4;
1871 c = *macaddr;
1872 if (isdigit(c))
1873 val |= c - '0';
1874 else if (c >= 'a' && c <= 'f')
1875 val |= c - 'a' + 10;
1876 else if (c >= 'A' && c <= 'F')
1877 val |= c - 'A' + 10;
1878 else if (c == ':' || c == 0)
1879 val >>= 4;
1880 else
1881 return -EINVAL;
1882 if (c != 0)
1883 macaddr++;
1884 *data++ = (unsigned char)(val & 0377);
1885 i++;
1886
1887 if (*macaddr == ':')
1888 macaddr++;
0ad19a3f 1889 }
0ad19a3f 1890
1f1b18e7 1891 return 0;
0ad19a3f 1892}
1893
ebc73a67
CB
1894static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1895 void *acast, int prefix)
0ad19a3f 1896{
ebc73a67 1897 int addrlen, err;
06f976ca 1898 struct ifaddrmsg *ifa;
ebc73a67
CB
1899 struct nl_handler nlh;
1900 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1901
ebc73a67
CB
1902 addrlen = family == AF_INET ? sizeof(struct in_addr)
1903 : sizeof(struct in6_addr);
4bf1968d 1904
3cfc0f3a
MN
1905 err = netlink_open(&nlh, NETLINK_ROUTE);
1906 if (err)
1907 return err;
0ad19a3f 1908
3cfc0f3a 1909 err = -ENOMEM;
0ad19a3f 1910 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1911 if (!nlmsg)
1912 goto out;
1913
06f976ca 1914 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1915 if (!answer)
1916 goto out;
1917
06f976ca 1918 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1919 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1920 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1921
1922 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1923 if (!ifa)
25a9939b 1924 goto out;
06f976ca
SZ
1925 ifa->ifa_prefixlen = prefix;
1926 ifa->ifa_index = ifindex;
1927 ifa->ifa_family = family;
1928 ifa->ifa_scope = 0;
acf47e1b 1929
3cfc0f3a 1930 err = -EINVAL;
4bf1968d 1931 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1932 goto out;
1933
4bf1968d 1934 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1935 goto out;
1936
d8948a52 1937 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1938 goto out;
1939
ebc73a67 1940 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1941 err = -EPROTONOSUPPORT;
79881dc6
DL
1942 if (family == AF_INET6 &&
1943 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1944 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1945 goto out;
0ad19a3f 1946
3cfc0f3a 1947 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1948out:
1949 netlink_close(&nlh);
1950 nlmsg_free(answer);
1951 nlmsg_free(nlmsg);
1952 return err;
1953}
1954
1f1b18e7 1955int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1956 struct in6_addr *mcast, struct in6_addr *acast,
1957 int prefix)
1f1b18e7
DL
1958{
1959 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1960}
1961
ebc73a67
CB
1962int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1963 int prefix)
1f1b18e7
DL
1964{
1965 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1966}
1967
ebc73a67
CB
1968/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1969 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1970 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1971 */
6ce39620
CB
1972#pragma GCC diagnostic push
1973#pragma GCC diagnostic ignored "-Wcast-align"
1974
ebc73a67
CB
1975static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1976{
1977 int addrlen;
06f976ca
SZ
1978 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1979 struct rtattr *rta = IFA_RTA(ifa);
1980 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1981
06f976ca 1982 if (ifa->ifa_family != family)
19a26f82
MK
1983 return 0;
1984
ebc73a67
CB
1985 addrlen = family == AF_INET ? sizeof(struct in_addr)
1986 : sizeof(struct in6_addr);
19a26f82
MK
1987
1988 /* Loop over the rtattr's in this message */
ebc73a67 1989 while (RTA_OK(rta, attr_len)) {
19a26f82 1990 /* Found a local address for the requested interface,
ebc73a67
CB
1991 * return it.
1992 */
1993 if (rta->rta_type == IFA_LOCAL ||
1994 rta->rta_type == IFA_ADDRESS) {
1995 /* Sanity check. The family check above should make sure
1996 * the address length is correct, but check here just in
1997 * case.
1998 */
19a26f82
MK
1999 if (RTA_PAYLOAD(rta) != addrlen)
2000 return -1;
2001
ebc73a67
CB
2002 /* We might have found an IFA_ADDRESS before, which we
2003 * now overwrite with an IFA_LOCAL.
2004 */
dd66e5ad 2005 if (!*res) {
19a26f82 2006 *res = malloc(addrlen);
dd66e5ad
DE
2007 if (!*res)
2008 return -1;
2009 }
19a26f82
MK
2010
2011 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2012 if (rta->rta_type == IFA_LOCAL)
2013 break;
2014 }
2015 rta = RTA_NEXT(rta, attr_len);
2016 }
2017 return 0;
2018}
2019
6ce39620
CB
2020#pragma GCC diagnostic pop
2021
19a26f82
MK
2022static int ip_addr_get(int family, int ifindex, void **res)
2023{
ebc73a67 2024 int answer_len, err;
06f976ca 2025 struct ifaddrmsg *ifa;
ebc73a67 2026 struct nl_handler nlh;
19a26f82 2027 struct nlmsghdr *msg;
ebc73a67
CB
2028 int readmore = 0, recv_len = 0;
2029 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2030
2031 err = netlink_open(&nlh, NETLINK_ROUTE);
2032 if (err)
2033 return err;
2034
2035 err = -ENOMEM;
2036 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2037 if (!nlmsg)
2038 goto out;
2039
06f976ca 2040 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2041 if (!answer)
2042 goto out;
2043
ebc73a67
CB
2044 /* Save the answer buffer length, since it will be overwritten on the
2045 * first receive (and we might need to receive more than once).
2046 */
06f976ca
SZ
2047 answer_len = answer->nlmsghdr->nlmsg_len;
2048
ebc73a67 2049 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2050 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2051
06f976ca 2052 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2053 if (!ifa)
2054 goto out;
06f976ca 2055 ifa->ifa_family = family;
19a26f82 2056
ebc73a67
CB
2057 /* Send the request for addresses, which returns all addresses on all
2058 * interfaces.
2059 */
19a26f82
MK
2060 err = netlink_send(&nlh, nlmsg);
2061 if (err < 0)
2062 goto out;
19a26f82 2063
6ce39620
CB
2064#pragma GCC diagnostic push
2065#pragma GCC diagnostic ignored "-Wcast-align"
2066
19a26f82
MK
2067 do {
2068 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2069 * overwritten by a previous receive.
2070 */
06f976ca 2071 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2072
ebc73a67 2073 /* Get the (next) batch of reply messages. */
19a26f82
MK
2074 err = netlink_rcv(&nlh, answer);
2075 if (err < 0)
2076 goto out;
2077
2078 recv_len = err;
2079 err = 0;
2080
ebc73a67 2081 /* Satisfy the typing for the netlink macros. */
06f976ca 2082 msg = answer->nlmsghdr;
19a26f82
MK
2083
2084 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2085 /* Stop reading if we see an error message. */
19a26f82 2086 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2087 struct nlmsgerr *errmsg =
2088 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2089 err = errmsg->error;
2090 goto out;
2091 }
2092
ebc73a67 2093 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2094 if (msg->nlmsg_type == NLMSG_DONE) {
2095 readmore = 0;
2096 break;
2097 }
2098
2099 if (msg->nlmsg_type != RTM_NEWADDR) {
2100 err = -1;
2101 goto out;
2102 }
2103
06f976ca
SZ
2104 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2105 if (ifa->ifa_index == ifindex) {
2106 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2107 err = -1;
2108 goto out;
2109 }
2110
ebc73a67 2111 /* Found a result, stop searching. */
19a26f82
MK
2112 if (*res)
2113 goto out;
2114 }
2115
ebc73a67
CB
2116 /* Keep reading more data from the socket if the last
2117 * message had the NLF_F_MULTI flag set.
2118 */
19a26f82
MK
2119 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2120
ebc73a67 2121 /* Look at the next message received in this buffer. */
19a26f82
MK
2122 msg = NLMSG_NEXT(msg, recv_len);
2123 }
2124 } while (readmore);
2125
6ce39620
CB
2126#pragma GCC diagnostic pop
2127
19a26f82 2128 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2129 * error.
2130 */
19a26f82
MK
2131 err = -1;
2132
2133out:
2134 netlink_close(&nlh);
2135 nlmsg_free(answer);
2136 nlmsg_free(nlmsg);
2137 return err;
2138}
2139
2140int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2141{
ebc73a67 2142 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2143}
2144
ebc73a67 2145int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2146{
ebc73a67 2147 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2148}
2149
f8fee0e2
MK
2150static int ip_gateway_add(int family, int ifindex, void *gw)
2151{
ebc73a67 2152 int addrlen, err;
f8fee0e2 2153 struct nl_handler nlh;
06f976ca 2154 struct rtmsg *rt;
ebc73a67 2155 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2156
ebc73a67
CB
2157 addrlen = family == AF_INET ? sizeof(struct in_addr)
2158 : sizeof(struct in6_addr);
f8fee0e2
MK
2159
2160 err = netlink_open(&nlh, NETLINK_ROUTE);
2161 if (err)
2162 return err;
2163
2164 err = -ENOMEM;
2165 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2166 if (!nlmsg)
2167 goto out;
2168
06f976ca 2169 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2170 if (!answer)
2171 goto out;
2172
06f976ca 2173 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2174 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2175 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2176
2177 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2178 if (!rt)
2179 goto out;
06f976ca
SZ
2180 rt->rtm_family = family;
2181 rt->rtm_table = RT_TABLE_MAIN;
2182 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2183 rt->rtm_protocol = RTPROT_BOOT;
2184 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2185 /* "default" destination */
06f976ca 2186 rt->rtm_dst_len = 0;
f8fee0e2
MK
2187
2188 err = -EINVAL;
a2f9a670 2189
2190 /* If gateway address not supplied, then a device route will be created instead */
2191 if (gw != NULL) {
2192 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2193 goto out;
2194 }
f8fee0e2
MK
2195
2196 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2197 * addresses for the gateway.
2198 */
f8fee0e2
MK
2199 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2200 goto out;
2201
2202 err = netlink_transaction(&nlh, nlmsg, answer);
2203out:
2204 netlink_close(&nlh);
2205 nlmsg_free(answer);
2206 nlmsg_free(nlmsg);
2207 return err;
2208}
2209
2210int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2211{
2212 return ip_gateway_add(AF_INET, ifindex, gw);
2213}
2214
2215int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2216{
2217 return ip_gateway_add(AF_INET6, ifindex, gw);
2218}
581c75e7 2219bool is_ovs_bridge(const char *bridge)
0d204771 2220{
ebc73a67 2221 int ret;
0d204771 2222 struct stat sb;
ebc73a67 2223 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2224
ebc73a67
CB
2225 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2226 bridge);
2227 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2228 return false;
2229
2230 ret = stat(brdirname, &sb);
2231 if (ret < 0 && errno == ENOENT)
0d204771 2232 return true;
ebc73a67 2233
0d204771
SH
2234 return false;
2235}
2236
581c75e7
CB
2237struct ovs_veth_args {
2238 const char *bridge;
2239 const char *nic;
2240};
2241
cb0dc11b
CB
2242/* Called from a background thread - when nic goes away, remove it from the
2243 * bridge.
c43cbc04 2244 */
581c75e7 2245static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2246{
581c75e7 2247 struct ovs_veth_args *args = data;
cb0dc11b 2248
581c75e7
CB
2249 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2250 (char *)NULL);
2251 return -1;
c43cbc04
SH
2252}
2253
581c75e7 2254int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2255{
c43cbc04 2256 int ret;
419590da 2257 char cmd_output[PATH_MAX];
581c75e7 2258 struct ovs_veth_args args;
6ad22d06 2259
581c75e7
CB
2260 args.bridge = bridge;
2261 args.nic = nic;
2262 ret = run_command(cmd_output, sizeof(cmd_output),
2263 lxc_ovs_delete_port_exec, (void *)&args);
2264 if (ret < 0) {
2265 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2266 "%s", bridge, nic, cmd_output);
6ad22d06 2267 return -1;
581c75e7 2268 }
0d204771 2269
581c75e7
CB
2270 return 0;
2271}
ebc73a67 2272
581c75e7
CB
2273static int lxc_ovs_attach_bridge_exec(void *data)
2274{
2275 struct ovs_veth_args *args = data;
ebc73a67 2276
581c75e7
CB
2277 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2278 (char *)NULL);
2279 return -1;
2280}
ebc73a67 2281
581c75e7
CB
2282static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2283{
2284 int ret;
419590da 2285 char cmd_output[PATH_MAX];
581c75e7 2286 struct ovs_veth_args args;
ebc73a67 2287
581c75e7
CB
2288 args.bridge = bridge;
2289 args.nic = nic;
2290 ret = run_command(cmd_output, sizeof(cmd_output),
2291 lxc_ovs_attach_bridge_exec, (void *)&args);
2292 if (ret < 0) {
2293 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2294 bridge, nic, cmd_output);
2295 return -1;
c43cbc04 2296 }
0d204771 2297
581c75e7 2298 return 0;
0d204771 2299}
0d204771 2300
581c75e7 2301int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2302{
ebc73a67 2303 int err, fd, index;
9de31d5a 2304 size_t retlen;
0ad19a3f 2305 struct ifreq ifr;
2306
dae3fdf6 2307 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2308 return -EINVAL;
0ad19a3f 2309
2310 index = if_nametoindex(ifname);
2311 if (!index)
3cfc0f3a 2312 return -EINVAL;
0ad19a3f 2313
0d204771 2314 if (is_ovs_bridge(bridge))
581c75e7 2315 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2316
ad9429e5 2317 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2318 if (fd < 0)
3cfc0f3a 2319 return -errno;
0ad19a3f 2320
9de31d5a 2321 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2322 if (retlen >= IFNAMSIZ) {
2323 close(fd);
9de31d5a 2324 return -E2BIG;
42cc4083 2325 }
9de31d5a 2326
ebc73a67 2327 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2328 ifr.ifr_ifindex = index;
7d163508 2329 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2330 close(fd);
3cfc0f3a
MN
2331 if (err)
2332 err = -errno;
0ad19a3f 2333
2334 return err;
2335}
72d0e1cb 2336
ebc73a67 2337static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2338 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2339 [LXC_NET_VETH] = "veth",
2340 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2341 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2342 [LXC_NET_PHYS] = "phys",
b343592b
BP
2343 [LXC_NET_VLAN] = "vlan",
2344 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2345};
2346
2347const char *lxc_net_type_to_str(int type)
2348{
2349 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2350 return NULL;
ebc73a67 2351
72d0e1cb
SG
2352 return lxc_network_types[type];
2353}
8befa924 2354
ebc73a67 2355static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2356
966e9f1f 2357char *lxc_mkifname(char *template)
a0265685 2358{
2d7bf744 2359 int ret;
b1e44ed1 2360 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2361 char name[IFNAMSIZ];
2362 bool exists = false;
2363 size_t i = 0;
280cc35f 2364#ifdef HAVE_RAND_R
2365 unsigned int seed;
2366
2367 seed = randseed(false);
2368#else
2369
2370 (void)randseed(true);
2371#endif
a0265685 2372
535e8859
CB
2373 if (strlen(template) >= IFNAMSIZ)
2374 return NULL;
2375
ebc73a67 2376 /* Get all the network interfaces. */
b1e44ed1 2377 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2378 if (ret < 0) {
6d1400b5 2379 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2380 return NULL;
2381 }
a0265685 2382
ebc73a67 2383 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2384 for (;;) {
966e9f1f 2385 name[0] = '\0';
94b1cade 2386 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2387
966e9f1f 2388 exists = false;
280cc35f 2389
a0265685
SG
2390 for (i = 0; i < strlen(name); i++) {
2391 if (name[i] == 'X') {
2392#ifdef HAVE_RAND_R
8523344a 2393 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2394#else
8523344a 2395 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2396#endif
2397 }
2398 }
2399
2400 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2401 if (!strcmp(ifa->ifa_name, name)) {
2402 exists = true;
a0265685
SG
2403 break;
2404 }
2405 }
2406
966e9f1f 2407 if (!exists)
a0265685 2408 break;
a0265685
SG
2409 }
2410
b1e44ed1 2411 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2412 (void)strlcpy(template, name, strlen(template) + 1);
2413
2414 return template;
a0265685
SG
2415}
2416
8befa924
SH
2417int setup_private_host_hw_addr(char *veth1)
2418{
ebc73a67 2419 int err, sockfd;
8befa924 2420 struct ifreq ifr;
8befa924 2421
ad9429e5 2422 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2423 if (sockfd < 0)
2424 return -errno;
2425
ebc73a67 2426 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2427 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2428 close(sockfd);
ebc73a67 2429 return -E2BIG;
87c6e5db 2430 }
ebc73a67 2431
8befa924
SH
2432 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2433 if (err < 0) {
8befa924 2434 close(sockfd);
8befa924
SH
2435 return -errno;
2436 }
2437
2438 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2439 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2440 close(sockfd);
8befa924
SH
2441 if (err < 0)
2442 return -errno;
2443
2444 return 0;
2445}
811ef482
CB
2446
2447int lxc_find_gateway_addresses(struct lxc_handler *handler)
2448{
2449 struct lxc_list *network = &handler->conf->network;
2450 struct lxc_list *iterator;
2451 struct lxc_netdev *netdev;
2452 int link_index;
2453
2454 lxc_list_for_each(iterator, network) {
2455 netdev = iterator->elem;
2456
2457 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2458 continue;
2459
2460 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2461 ERROR("Automatic gateway detection is only supported "
2462 "for veth and macvlan");
2463 return -1;
2464 }
2465
de4855a8 2466 if (netdev->link[0] == '\0') {
811ef482
CB
2467 ERROR("Automatic gateway detection needs a link interface");
2468 return -1;
2469 }
2470
2471 link_index = if_nametoindex(netdev->link);
2472 if (!link_index)
2473 return -EINVAL;
2474
2475 if (netdev->ipv4_gateway_auto) {
2476 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2477 ERROR("Failed to automatically find ipv4 gateway "
2478 "address from link interface \"%s\"", netdev->link);
2479 return -1;
2480 }
2481 }
2482
2483 if (netdev->ipv6_gateway_auto) {
2484 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2485 ERROR("Failed to automatically find ipv6 gateway "
2486 "address from link interface \"%s\"", netdev->link);
2487 return -1;
2488 }
2489 }
2490 }
2491
2492 return 0;
2493}
2494
2495#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2496static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2497 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2498{
2499 int ret;
2500 pid_t child;
2501 int bytes, pipefd[2];
2502 char *token, *saveptr = NULL;
095ead80 2503 char netdev_link[IFNAMSIZ];
419590da 2504 char buffer[PATH_MAX] = {0};
94b1cade 2505 size_t retlen;
811ef482
CB
2506
2507 if (netdev->type != LXC_NET_VETH) {
2508 ERROR("Network type %d not support for unprivileged use", netdev->type);
2509 return -1;
2510 }
2511
2512 ret = pipe(pipefd);
2513 if (ret < 0) {
2514 SYSERROR("Failed to create pipe");
2515 return -1;
2516 }
2517
2518 child = fork();
2519 if (child < 0) {
2520 SYSERROR("Failed to create new process");
2521 close(pipefd[0]);
2522 close(pipefd[1]);
2523 return -1;
2524 }
2525
2526 if (child == 0) {
8335fd40 2527 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2528
2529 close(pipefd[0]);
2530
2531 ret = dup2(pipefd[1], STDOUT_FILENO);
2532 if (ret >= 0)
2533 ret = dup2(pipefd[1], STDERR_FILENO);
2534 close(pipefd[1]);
2535 if (ret < 0) {
2536 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2537 _exit(EXIT_FAILURE);
811ef482
CB
2538 }
2539
de4855a8 2540 if (netdev->link[0] != '\0')
9de31d5a 2541 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2542 else
9de31d5a
CB
2543 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2544 if (retlen >= IFNAMSIZ) {
2545 SYSERROR("Invalid network device name");
2546 _exit(EXIT_FAILURE);
2547 }
811ef482 2548
8335fd40
CB
2549 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2550 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2551 _exit(EXIT_FAILURE);
8335fd40 2552 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2553
2554 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2555 lxcname, pidstr, netdev_link,
de4855a8
CB
2556 netdev->name[0] != '\0' ? netdev->name : "(null)");
2557 if (netdev->name[0] != '\0')
811ef482
CB
2558 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2559 lxcpath, lxcname, pidstr, "veth", netdev_link,
2560 netdev->name, (char *)NULL);
2561 else
2562 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2563 lxcpath, lxcname, pidstr, "veth", netdev_link,
2564 (char *)NULL);
2565 SYSERROR("Failed to execute lxc-user-nic");
78070056 2566 _exit(EXIT_FAILURE);
811ef482
CB
2567 }
2568
2569 /* close the write-end of the pipe */
2570 close(pipefd[1]);
2571
419590da 2572 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2573 if (bytes < 0) {
74c6e2b0 2574 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2575 close(pipefd[0]);
6b9f82a9
CB
2576 } else {
2577 buffer[bytes - 1] = '\0';
811ef482 2578 }
811ef482
CB
2579
2580 ret = wait_for_pid(child);
2581 close(pipefd[0]);
6b9f82a9 2582 if (ret != 0 || bytes < 0) {
811ef482
CB
2583 ERROR("lxc-user-nic failed to configure requested network: %s",
2584 buffer[0] != '\0' ? buffer : "(null)");
2585 return -1;
2586 }
2587 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2588
2589 /* netdev->name */
2590 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2591 if (!token) {
2592 ERROR("Failed to parse lxc-user-nic output");
811ef482 2593 return -1;
74c6e2b0 2594 }
811ef482 2595
e389f2af
CB
2596 /*
2597 * lxc-user-nic will take care of proper network device naming. So
2598 * netdev->name and netdev->created_name need to be identical to not
2599 * trigger another rename later on.
2600 */
2601 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2602 if (retlen < IFNAMSIZ)
2603 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2604 if (retlen >= IFNAMSIZ) {
2605 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2606 return -E2BIG;
2607 }
811ef482 2608
74c6e2b0 2609 /* netdev->ifindex */
811ef482 2610 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2611 if (!token) {
2612 ERROR("Failed to parse lxc-user-nic output");
811ef482 2613 return -1;
74c6e2b0 2614 }
811ef482 2615
74c6e2b0
CB
2616 ret = lxc_safe_int(token, &netdev->ifindex);
2617 if (ret < 0) {
6d1400b5 2618 errno = -ret;
2619 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2620 return -1;
2621 }
2622
74c6e2b0 2623 /* netdev->priv.veth_attr.veth1 */
811ef482 2624 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2625 if (!token) {
2626 ERROR("Failed to parse lxc-user-nic output");
811ef482 2627 return -1;
74c6e2b0 2628 }
811ef482 2629
94b1cade
DJ
2630 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2631 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2632 ERROR("Host side veth device name returned by lxc-user-nic is "
2633 "too long");
2634 return -E2BIG;
2635 }
74c6e2b0
CB
2636
2637 /* netdev->priv.veth_attr.ifindex */
2638 token = strtok_r(NULL, ":", &saveptr);
2639 if (!token) {
2640 ERROR("Failed to parse lxc-user-nic output");
2641 return -1;
2642 }
2643
2644 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2645 if (ret < 0) {
6d1400b5 2646 errno = -ret;
2647 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2648 return -1;
2649 }
2650
4d781681 2651 if (netdev->upscript) {
2652 char *argv[] = {
2653 "veth",
2654 netdev->link,
2655 netdev->priv.veth_attr.veth1,
2656 NULL,
2657 };
2658
e389f2af
CB
2659 ret = run_script_argv(lxcname, hooks_version, "net",
2660 netdev->upscript, "up", argv);
4d781681 2661 if (ret < 0)
2662 return -1;
2663 }
2664
811ef482
CB
2665 return 0;
2666}
2667
f0ecc19d 2668static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2669 struct lxc_netdev *netdev,
2670 const char *netns_path)
811ef482
CB
2671{
2672 int bytes, ret;
2673 pid_t child;
2674 int pipefd[2];
419590da 2675 char buffer[PATH_MAX] = {0};
811ef482
CB
2676
2677 if (netdev->type != LXC_NET_VETH) {
2678 ERROR("Network type %d not support for unprivileged use", netdev->type);
2679 return -1;
2680 }
2681
2682 ret = pipe(pipefd);
2683 if (ret < 0) {
2684 SYSERROR("Failed to create pipe");
2685 return -1;
2686 }
2687
2688 child = fork();
2689 if (child < 0) {
2690 SYSERROR("Failed to create new process");
2691 close(pipefd[0]);
2692 close(pipefd[1]);
2693 return -1;
2694 }
2695
2696 if (child == 0) {
8843fde4 2697 char *hostveth;
811ef482
CB
2698
2699 close(pipefd[0]);
2700
2701 ret = dup2(pipefd[1], STDOUT_FILENO);
2702 if (ret >= 0)
2703 ret = dup2(pipefd[1], STDERR_FILENO);
2704 close(pipefd[1]);
2705 if (ret < 0) {
2706 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2707 _exit(EXIT_FAILURE);
811ef482
CB
2708 }
2709
8843fde4
CB
2710 if (netdev->priv.veth_attr.pair[0] != '\0')
2711 hostveth = netdev->priv.veth_attr.pair;
2712 else
2713 hostveth = netdev->priv.veth_attr.veth1;
2714 if (hostveth[0] == '\0') {
74c6e2b0 2715 SYSERROR("Host side veth device name is missing");
a30b9023 2716 _exit(EXIT_FAILURE);
74c6e2b0
CB
2717 }
2718
de4855a8 2719 if (netdev->link[0] == '\0') {
811ef482 2720 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2721 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2722 _exit(EXIT_FAILURE);
74c6e2b0 2723 }
811ef482 2724
811ef482 2725 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2726 lxcname, netns_path, netdev->link, hostveth);
811ef482 2727 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2728 lxcname, netns_path, "veth", netdev->link, hostveth,
2729 (char *)NULL);
811ef482 2730 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2731 _exit(EXIT_FAILURE);
811ef482
CB
2732 }
2733
2734 close(pipefd[1]);
2735
419590da 2736 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2737 if (bytes < 0) {
2738 SYSERROR("Failed to read from pipe file descriptor.");
2739 close(pipefd[0]);
6b9f82a9
CB
2740 } else {
2741 buffer[bytes - 1] = '\0';
811ef482 2742 }
811ef482 2743
6b9f82a9
CB
2744 ret = wait_for_pid(child);
2745 close(pipefd[0]);
2746 if (ret != 0 || bytes < 0) {
811ef482
CB
2747 ERROR("lxc-user-nic failed to delete requested network: %s",
2748 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2749 return -1;
2750 }
2751
811ef482
CB
2752 return 0;
2753}
2754
1bd8d726
CB
2755bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2756{
2757 int ret;
2758 struct lxc_list *iterator;
2759 struct lxc_list *network = &handler->conf->network;
2760 /* strlen("/proc/") = 6
2761 * +
8335fd40 2762 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2763 * +
2764 * strlen("/fd/") = 4
2765 * +
8335fd40 2766 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2767 * +
2768 * \0
2769 */
8335fd40 2770 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2771
2772 *netns_path = '\0';
2773
28d9e29e 2774 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2775 DEBUG("Cannot not guarantee safe deletion of network devices. "
2776 "Manual cleanup maybe needed");
2777 return false;
2778 }
2779
2780 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2781 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2782 if (ret < 0 || ret >= sizeof(netns_path))
2783 return false;
2784
2785 lxc_list_for_each(iterator, network) {
2786 char *hostveth = NULL;
2787 struct lxc_netdev *netdev = iterator->elem;
2788
2789 /* We can only delete devices whose ifindex we have. If we don't
2790 * have the index it means that we didn't create it.
2791 */
2792 if (!netdev->ifindex)
2793 continue;
2794
2795 if (netdev->type == LXC_NET_PHYS) {
2796 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2797 netdev->link);
2798 if (ret < 0)
2799 WARN("Failed to rename interface with index %d "
2800 "to its initial name \"%s\"",
2801 netdev->ifindex, netdev->link);
2802 else
2803 TRACE("Renamed interface with index %d to its "
2804 "initial name \"%s\"",
2805 netdev->ifindex, netdev->link);
b3259dc6
TP
2806
2807 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2808 goto clear_ifindices;
1bd8d726
CB
2809 }
2810
2811 ret = netdev_deconf[netdev->type](handler, netdev);
2812 if (ret < 0)
2813 WARN("Failed to deconfigure network device");
2814
2815 if (netdev->type != LXC_NET_VETH)
66a7c406 2816 goto clear_ifindices;
1bd8d726 2817
c869be20 2818 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2819 goto clear_ifindices;
1bd8d726 2820
8843fde4
CB
2821 if (netdev->priv.veth_attr.pair[0] != '\0')
2822 hostveth = netdev->priv.veth_attr.pair;
2823 else
2824 hostveth = netdev->priv.veth_attr.veth1;
2825 if (hostveth[0] == '\0')
66a7c406 2826 goto clear_ifindices;
8843fde4 2827
1bd8d726
CB
2828 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2829 handler->name, netdev,
2830 netns_path);
2831 if (ret < 0) {
1bd8d726 2832 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2833 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2834 goto clear_ifindices;
1bd8d726
CB
2835 }
2836 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2837 netdev->link);
66a7c406
CB
2838
2839clear_ifindices:
ad2ddfcd 2840 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2841 * have cached stale data which would cause it to fail on reboot
2842 * we're we don't re-read the on-disk config file.
2843 */
2844 netdev->ifindex = 0;
2845 if (netdev->type == LXC_NET_PHYS) {
2846 netdev->priv.phys_attr.ifindex = 0;
2847 } else if (netdev->type == LXC_NET_VETH) {
2848 netdev->priv.veth_attr.veth1[0] = '\0';
2849 netdev->priv.veth_attr.ifindex = 0;
2850 }
1bd8d726
CB
2851 }
2852
bb84beda 2853 return true;
1bd8d726
CB
2854}
2855
6509154d 2856struct ip_proxy_args {
2857 const char *ip;
2858 const char *dev;
2859};
2860
2861static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2862{
2863 struct ip_proxy_args *args = data;
2864
2865 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2866 return -1;
2867}
2868
2869static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2870{
2871 struct ip_proxy_args *args = data;
2872
2873 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2874 return -1;
2875}
2876
2877static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2878{
2879 int ret;
2880 char cmd_output[PATH_MAX];
2881 struct ip_proxy_args args = {
2882 .ip = ip,
2883 .dev = dev,
2884 };
2885
2886 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2887 if (ret < 0) {
2888 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2889 return -1;
2890 }
2891
2892 return 0;
2893}
2894
2895static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2896{
2897 int ret;
2898 char cmd_output[PATH_MAX];
2899 struct ip_proxy_args args = {
2900 .ip = ip,
2901 .dev = dev,
2902 };
2903
2904 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2905 if (ret < 0) {
2906 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2907 return -1;
2908 }
2909
2910 return 0;
2911}
2912
2913static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2914 struct lxc_list *cur, *next;
2915 struct lxc_inetdev *inet4dev;
2916 struct lxc_inet6dev *inet6dev;
2917 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2918 int err = 0;
2919 unsigned int lo_ifindex = 0;
6509154d 2920
2921 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2922 if (!lxc_list_empty(&netdev->ipv4)) {
2923 /* Check for net.ipv4.conf.[link].forwarding=1 */
2924 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2925 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2926 return minus_one_set_errno(EINVAL);
2927 }
2928 }
2929
2930 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2931 if (!lxc_list_empty(&netdev->ipv6)) {
2932 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2933 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2934 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2935 return minus_one_set_errno(EINVAL);
2936 }
2937
2938 /* Check for net.ipv6.conf.[link].forwarding=1 */
2939 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2940 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2941 return minus_one_set_errno(EINVAL);
2942 }
2943 }
2944
b670016a 2945 /* Perform IPVLAN specific checks. */
2946 if (netdev->type == LXC_NET_IPVLAN) {
2947 /* Check mode is l3s as other modes do not work with l2proxy. */
2948 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2949 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2950 return minus_one_set_errno(EINVAL);
2951 }
2952
2953 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2954 lo_ifindex = if_nametoindex(loop_device);
b670016a 2955 if (lo_ifindex == 0) {
3ebffb98 2956 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2957 return minus_one_set_errno(EINVAL);
2958 }
2959 }
2960
6509154d 2961 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2962 inet4dev = cur->elem;
2963 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2964 return minus_one_set_errno(-errno);
2965
2966 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2967 return minus_one_set_errno(EINVAL);
b670016a 2968
2969 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2970 if (netdev->type == LXC_NET_IPVLAN) {
2971 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2972 if (err < 0) {
3ebffb98 2973 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2974 return minus_one_set_errno(-err);
2975 }
2976 }
6509154d 2977 }
2978
2979 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2980 inet6dev = cur->elem;
2981 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2982 return minus_one_set_errno(-errno);
2983
2984 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2985 return minus_one_set_errno(EINVAL);
b670016a 2986
2987 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2988 if (netdev->type == LXC_NET_IPVLAN) {
2989 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2990 if (err < 0) {
3ebffb98 2991 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 2992 return minus_one_set_errno(-err);
2993 }
2994 }
6509154d 2995 }
2996
2997 return 0;
2998}
2999
b670016a 3000static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3001 char bufinet4[INET_ADDRSTRLEN];
3002 unsigned int errCount = 0;
3003
3004 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3005 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3006 return minus_one_set_errno(EINVAL);
3007 }
3008
3009 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3010 if (lo_ifindex > 0) {
3011 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3012 errCount++;
3013 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3014 }
3015 }
3016
3017 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3018 if (link[0] != '\0') {
3019 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3020 errCount++;
3021 }
3022
3023 if (errCount > 0)
3024 return minus_one_set_errno(EINVAL);
3025
3026 return 0;
3027}
3028
3029static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3030 char bufinet6[INET6_ADDRSTRLEN];
3031 unsigned int errCount = 0;
3032
3033 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3034 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3035 return minus_one_set_errno(EINVAL);
3036 }
3037
3038 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3039 if (lo_ifindex > 0) {
3040 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3041 errCount++;
3042 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3043 }
3044 }
3045
3046 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3047 if (link[0] != '\0') {
3048 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3049 errCount++;
3050 }
3051
3052 if (errCount > 0)
3053 return minus_one_set_errno(EINVAL);
3054
3055 return 0;
3056}
3057
6509154d 3058static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3059 unsigned int lo_ifindex = 0;
3060 unsigned int errCount = 0;
6509154d 3061 struct lxc_list *cur, *next;
3062 struct lxc_inetdev *inet4dev;
3063 struct lxc_inet6dev *inet6dev;
6509154d 3064
b670016a 3065 /* Perform IPVLAN specific checks. */
3066 if (netdev->type == LXC_NET_IPVLAN) {
3067 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3068 lo_ifindex = if_nametoindex(loop_device);
b670016a 3069 if (lo_ifindex == 0) {
3070 errCount++;
3ebffb98 3071 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3072 }
b670016a 3073 }
6509154d 3074
b670016a 3075 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3076 inet4dev = cur->elem;
3077 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3078 errCount++;
6509154d 3079 }
3080
3081 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3082 inet6dev = cur->elem;
b670016a 3083 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3084 errCount++;
6509154d 3085 }
3086
b670016a 3087 if (errCount > 0)
6509154d 3088 return minus_one_set_errno(EINVAL);
3089
3090 return 0;
3091}
3092
e389f2af 3093static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3094{
811ef482
CB
3095 struct lxc_list *iterator;
3096 struct lxc_list *network = &handler->conf->network;
3097
811ef482
CB
3098 lxc_list_for_each(iterator, network) {
3099 struct lxc_netdev *netdev = iterator->elem;
3100
3101 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3102 ERROR("Invalid network configuration type %d", netdev->type);
3103 return -1;
3104 }
3105
6509154d 3106 /* Setup l2proxy entries if enabled and used with a link property */
3107 if (netdev->l2proxy && netdev->link[0] != '\0') {
3108 if (lxc_setup_l2proxy(netdev)) {
3109 ERROR("Failed to setup l2proxy");
3110 return -1;
3111 }
3112 }
3113
811ef482
CB
3114 if (netdev_conf[netdev->type](handler, netdev)) {
3115 ERROR("Failed to create network device");
3116 return -1;
3117 }
811ef482
CB
3118 }
3119
3120 return 0;
3121}
3122
e389f2af 3123int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3124{
e389f2af
CB
3125 pid_t pid = handler->pid;
3126 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3127 struct lxc_list *iterator;
3128
e0010464 3129 if (am_guest_unpriv())
74c6e2b0 3130 return 0;
811ef482
CB
3131
3132 lxc_list_for_each(iterator, network) {
e389f2af
CB
3133 int ret;
3134 char ifname[IFNAMSIZ];
811ef482
CB
3135 struct lxc_netdev *netdev = iterator->elem;
3136
811ef482
CB
3137 if (!netdev->ifindex)
3138 continue;
3139
3140 /* retrieve the name of the interface */
3141 if (!if_indextoname(netdev->ifindex, ifname)) {
3142 ERROR("No interface corresponding to ifindex \"%d\"",
3143 netdev->ifindex);
3144 return -1;
3145 }
3146
535e8859
CB
3147 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3148 if (ret) {
6d1400b5 3149 errno = -ret;
e389f2af
CB
3150 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3151 ifname, pid);
811ef482
CB
3152 return -1;
3153 }
3154
e389f2af
CB
3155 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3156
3157 DEBUG("Moved network device \"%s\" to network namespace of %d",
3158 netdev->created_name, pid);
811ef482
CB
3159 }
3160
3161 return 0;
3162}
3163
e389f2af 3164static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3165{
e389f2af
CB
3166 int hooks_version = handler->conf->hooks_version;
3167 const char *lxcname = handler->name;
3168 const char *lxcpath = handler->lxcpath;
3169 struct lxc_list *network = &handler->conf->network;
3170 pid_t pid = handler->pid;
74c6e2b0
CB
3171 struct lxc_list *iterator;
3172
74c6e2b0
CB
3173 lxc_list_for_each(iterator, network) {
3174 struct lxc_netdev *netdev = iterator->elem;
3175
3176 if (netdev->type == LXC_NET_EMPTY)
3177 continue;
3178
3179 if (netdev->type == LXC_NET_NONE)
3180 continue;
3181
3182 if (netdev->type != LXC_NET_VETH) {
e389f2af 3183 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3184 lxc_net_type_to_str(netdev->type));
3185 return -1;
3186 }
3187
3188 if (netdev->mtu)
3189 INFO("mtu ignored due to insufficient privilege");
3190
e389f2af
CB
3191 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3192 pid, hooks_version))
74c6e2b0
CB
3193 return -1;
3194 }
3195
3196 return 0;
3197}
3198
1bd8d726 3199bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3200{
3201 int ret;
3202 struct lxc_list *iterator;
3203 struct lxc_list *network = &handler->conf->network;
1bd8d726 3204
811ef482
CB
3205 lxc_list_for_each(iterator, network) {
3206 char *hostveth = NULL;
3207 struct lxc_netdev *netdev = iterator->elem;
3208
3209 /* We can only delete devices whose ifindex we have. If we don't
3210 * have the index it means that we didn't create it.
3211 */
3212 if (!netdev->ifindex)
3213 continue;
3214
6509154d 3215 /* Delete l2proxy entries if enabled and used with a link property */
3216 if (netdev->l2proxy && netdev->link[0] != '\0') {
3217 if (lxc_delete_l2proxy(netdev))
3218 WARN("Failed to delete all l2proxy config");
3219 /* Don't return, let the network be cleaned up as normal. */
3220 }
3221
811ef482
CB
3222 if (netdev->type == LXC_NET_PHYS) {
3223 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3224 if (ret < 0)
3225 WARN("Failed to rename interface with index %d "
b809f232
CB
3226 "from \"%s\" to its initial name \"%s\"",
3227 netdev->ifindex, netdev->name, netdev->link);
0b154989 3228 else {
29589196
CB
3229 TRACE("Renamed interface with index %d from "
3230 "\"%s\" to its initial name \"%s\"",
3231 netdev->ifindex, netdev->name,
3232 netdev->link);
0b154989
TP
3233
3234 /* Restore original MTU */
3235 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3236 if (ret < 0) {
3237 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3238 netdev->link, netdev->priv.phys_attr.mtu);
3239 } else {
3240 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3241 netdev->link, netdev->priv.phys_attr.mtu);
3242 }
3243 }
b3259dc6
TP
3244
3245 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3246 goto clear_ifindices;
811ef482
CB
3247 }
3248
3249 ret = netdev_deconf[netdev->type](handler, netdev);
3250 if (ret < 0)
3251 WARN("Failed to deconfigure network device");
3252
3253 /* Recent kernels remove the virtual interfaces when the network
3254 * namespace is destroyed but in case we did not move the
3255 * interface to the network namespace, we have to destroy it.
3256 */
1bd8d726 3257 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3258 if (ret < 0) {
3259 if (errno != ENODEV) {
3260 WARN("Failed to remove interface \"%s\" with index %d",
3261 netdev->name[0] != '\0' ? netdev->name : "(null)",
3262 netdev->ifindex);
3263 goto clear_ifindices;
3264 }
3265 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3266 netdev->name[0] != '\0' ? netdev->name : "(null)",
3267 netdev->ifindex);
811ef482 3268 }
1bd8d726 3269 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3270 netdev->name[0] != '\0' ? netdev->name : "(null)",
3271 netdev->ifindex);
811ef482
CB
3272
3273 if (netdev->type != LXC_NET_VETH)
66a7c406 3274 goto clear_ifindices;
811ef482 3275
811ef482
CB
3276 /* Explicitly delete host veth device to prevent lingering
3277 * devices. We had issues in LXD around this.
3278 */
de4855a8 3279 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3280 hostveth = netdev->priv.veth_attr.pair;
3281 else
3282 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3283 if (hostveth[0] == '\0')
66a7c406 3284 goto clear_ifindices;
811ef482
CB
3285
3286 ret = lxc_netdev_delete_by_name(hostveth);
3287 if (ret < 0) {
24548539
CB
3288 WARN("Failed to remove interface \"%s\" from \"%s\"",
3289 hostveth, netdev->link);
66a7c406 3290 goto clear_ifindices;
811ef482
CB
3291 }
3292 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3293
c869be20 3294 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3295 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3296 netdev->ifindex = 0;
3297 netdev->priv.veth_attr.ifindex = 0;
3298 goto clear_ifindices;
811ef482
CB
3299 }
3300
3301 /* Delete the openvswitch port. */
3302 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3303 if (ret < 0)
3304 WARN("Failed to remove port \"%s\" from openvswitch "
3305 "bridge \"%s\"", hostveth, netdev->link);
3306 else
3307 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3308 hostveth, netdev->link);
3309
66a7c406 3310clear_ifindices:
ad2ddfcd 3311 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3312 * have cached stale data which would cause it to fail on reboot
3313 * we're we don't re-read the on-disk config file.
3314 */
3315 netdev->ifindex = 0;
3316 if (netdev->type == LXC_NET_PHYS) {
3317 netdev->priv.phys_attr.ifindex = 0;
3318 } else if (netdev->type == LXC_NET_VETH) {
3319 netdev->priv.veth_attr.veth1[0] = '\0';
3320 netdev->priv.veth_attr.ifindex = 0;
3321 }
811ef482
CB
3322 }
3323
bb84beda 3324 return true;
811ef482
CB
3325}
3326
3327int lxc_requests_empty_network(struct lxc_handler *handler)
3328{
3329 struct lxc_list *network = &handler->conf->network;
3330 struct lxc_list *iterator;
3331 bool found_none = false, found_nic = false;
3332
3333 if (lxc_list_empty(network))
3334 return 0;
3335
3336 lxc_list_for_each(iterator, network) {
3337 struct lxc_netdev *netdev = iterator->elem;
3338
3339 if (netdev->type == LXC_NET_NONE)
3340 found_none = true;
3341 else
3342 found_nic = true;
3343 }
3344 if (found_none && !found_nic)
3345 return 1;
3346 return 0;
3347}
3348
3349/* try to move physical nics to the init netns */
b809f232 3350int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3351{
3352 int ret;
b809f232 3353 int oldfd;
811ef482 3354 char ifname[IFNAMSIZ];
b809f232 3355 struct lxc_list *iterator;
28d9e29e 3356 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3357 struct lxc_conf *conf = handler->conf;
811ef482 3358
b809f232
CB
3359 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3360 * the parent network namespace. We won't have this capability if we are
3361 * unprivileged.
3362 */
d0fbc7ba 3363 if (!handler->am_root)
b809f232 3364 return 0;
811ef482 3365
b809f232 3366 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3367
0037ab49 3368 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3369 if (oldfd < 0) {
3370 SYSERROR("Failed to preserve network namespace");
b809f232 3371 return -1;
811ef482
CB
3372 }
3373
b809f232 3374 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3375 if (ret < 0) {
3376 SYSERROR("Failed to enter network namespace");
3377 close(oldfd);
b809f232 3378 return -1;
811ef482
CB
3379 }
3380
b809f232
CB
3381 lxc_list_for_each(iterator, &conf->network) {
3382 struct lxc_netdev *netdev = iterator->elem;
811ef482 3383
b809f232
CB
3384 if (netdev->type != LXC_NET_PHYS)
3385 continue;
3386
3387 /* Retrieve the name of the interface in the container's network
3388 * namespace.
3389 */
3390 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3391 WARN("No interface corresponding to ifindex %d",
b809f232 3392 netdev->ifindex);
811ef482
CB
3393 continue;
3394 }
b809f232 3395
0037ab49 3396 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3397 if (ret < 0)
811ef482
CB
3398 WARN("Error moving network device \"%s\" back to "
3399 "network namespace", ifname);
b809f232
CB
3400 else
3401 TRACE("Moved network device \"%s\" back to network "
3402 "namespace", ifname);
811ef482 3403 }
811ef482 3404
b809f232 3405 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3406 close(oldfd);
b809f232
CB
3407 if (ret < 0) {
3408 SYSERROR("Failed to enter network namespace");
3409 return -1;
3410 }
3411
3412 return 0;
811ef482
CB
3413}
3414
3415static int setup_hw_addr(char *hwaddr, const char *ifname)
3416{
3417 struct sockaddr sockaddr;
3418 struct ifreq ifr;
6d1400b5 3419 int ret, fd;
811ef482
CB
3420
3421 ret = lxc_convert_mac(hwaddr, &sockaddr);
3422 if (ret) {
6d1400b5 3423 errno = -ret;
3424 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3425 return -1;
3426 }
3427
3428 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3429 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3430 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3431
ad9429e5 3432 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3433 if (fd < 0)
3434 return -1;
3435
3436 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3437 if (ret)
6d1400b5 3438 SYSERROR("Failed to perform ioctl");
3439
3440 close(fd);
811ef482
CB
3441
3442 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3443 ifr.ifr_name);
3444
3445 return ret;
3446}
3447
3448static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3449{
3450 struct lxc_list *iterator;
3451 int err;
3452
3453 lxc_list_for_each(iterator, ip) {
3454 struct lxc_inetdev *inetdev = iterator->elem;
3455
3456 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3457 &inetdev->bcast, inetdev->prefix);
3458 if (err) {
6d1400b5 3459 errno = -err;
3460 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3461 "with ifindex %d", ifindex);
811ef482
CB
3462 return -1;
3463 }
3464 }
3465
3466 return 0;
3467}
3468
3469static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3470{
3471 struct lxc_list *iterator;
3472 int err;
3473
3474 lxc_list_for_each(iterator, ip) {
3475 struct lxc_inet6dev *inet6dev = iterator->elem;
3476
3477 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3478 &inet6dev->mcast, &inet6dev->acast,
3479 inet6dev->prefix);
3480 if (err) {
6d1400b5 3481 errno = -err;
3482 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3483 "with ifindex %d", ifindex);
811ef482
CB
3484 return -1;
3485 }
3486 }
3487
3488 return 0;
3489}
3490
3491static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3492{
3493 char ifname[IFNAMSIZ];
3494 int err;
811ef482 3495 char *current_ifname = ifname;
009d6127 3496 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3497
3498 /* empty network namespace */
3499 if (!netdev->ifindex) {
3500 if (netdev->flags & IFF_UP) {
3501 err = lxc_netdev_up("lo");
3502 if (err) {
6d1400b5 3503 errno = -err;
3504 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3505 return -1;
3506 }
3507 }
3508
3509 if (netdev->type == LXC_NET_EMPTY)
3510 return 0;
3511
3512 if (netdev->type == LXC_NET_NONE)
3513 return 0;
3514
e389f2af
CB
3515 netdev->ifindex = if_nametoindex(netdev->created_name);
3516 if (!netdev->ifindex)
3517 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3518 netdev->name ?: "(null)");
811ef482
CB
3519 }
3520
3521 /* get the new ifindex in case of physical netdev */
3522 if (netdev->type == LXC_NET_PHYS) {
3523 netdev->ifindex = if_nametoindex(netdev->link);
3524 if (!netdev->ifindex) {
3525 ERROR("Failed to get ifindex for network device \"%s\"",
3526 netdev->link);
3527 return -1;
3528 }
3529 }
3530
3531 /* retrieve the name of the interface */
3532 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3533 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3534 netdev->ifindex);
811ef482
CB
3535 return -1;
3536 }
3537
e389f2af 3538 /* Default: let the system choose an interface name.
811ef482
CB
3539 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3540 * netlink will replace the format specifier with an appropriate index.
3541 */
de4855a8
CB
3542 if (netdev->name[0] == '\0') {
3543 if (netdev->type == LXC_NET_PHYS)
94b1cade 3544 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3545 else
94b1cade 3546 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3547 }
811ef482
CB
3548
3549 /* rename the interface name */
e389f2af
CB
3550 if (strcmp(current_ifname, netdev->name) != 0) {
3551 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3552 if (err) {
6d1400b5 3553 errno = -err;
3554 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3555 current_ifname, netdev->name);
811ef482
CB
3556 return -1;
3557 }
e389f2af
CB
3558
3559 TRACE("Renamed network device from \"%s\" to \"%s\"",
3560 current_ifname, netdev->name);
811ef482
CB
3561 }
3562
3563 /* Re-read the name of the interface because its name has changed
3564 * and would be automatically allocated by the system
3565 */
3566 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3567 ERROR("Failed get name for network device with ifindex %d",
3568 netdev->ifindex);
3569 return -1;
3570 }
3571
790255cf
CB
3572 /* Now update the recorded name of the network device to reflect the
3573 * name of the network device in the child's network namespace. We will
3574 * later on send this information back to the parent.
3575 */
94b1cade 3576 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3577
811ef482
CB
3578 /* set a mac address */
3579 if (netdev->hwaddr) {
3580 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3581 ERROR("Failed to setup hw address for network device \"%s\"",
3582 current_ifname);
3583 return -1;
3584 }
3585 }
3586
3587 /* setup ipv4 addresses on the interface */
3588 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3589 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3590 current_ifname);
811ef482
CB
3591 return -1;
3592 }
3593
3594 /* setup ipv6 addresses on the interface */
3595 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3596 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3597 current_ifname);
811ef482
CB
3598 return -1;
3599 }
3600
3601 /* set the network device up */
3602 if (netdev->flags & IFF_UP) {
811ef482
CB
3603 err = lxc_netdev_up(current_ifname);
3604 if (err) {
6d1400b5 3605 errno = -err;
3606 SYSERROR("Failed to set network device \"%s\" up",
3607 current_ifname);
811ef482
CB
3608 return -1;
3609 }
3610
3611 /* the network is up, make the loopback up too */
3612 err = lxc_netdev_up("lo");
3613 if (err) {
6d1400b5 3614 errno = -err;
3615 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3616 return -1;
3617 }
3618 }
3619
811ef482 3620 /* setup ipv4 gateway on the interface */
a2f9a670 3621 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3622 if (!(netdev->flags & IFF_UP)) {
3623 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3624 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3625 return -1;
3626 }
3627
3628 if (lxc_list_empty(&netdev->ipv4)) {
3629 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3630 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3631 return -1;
3632 }
3633
a2f9a670 3634 /* Setup device route if ipv4_gateway_dev is enabled */
3635 if (netdev->ipv4_gateway_dev) {
3636 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3637 if (err < 0) {
3638 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3639 current_ifname);
a2f9a670 3640 return minus_one_set_errno(-err);
811ef482 3641 }
a2f9a670 3642 } else {
009d6127 3643 /* Check the gateway address is valid */
3644 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3645 return minus_one_set_errno(errno);
3646
3647 /* Try adding a default route to the gateway address */
811ef482 3648 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3649 if (err < 0) {
3650 /* If adding the default route fails, this could be because the
3651 * gateway address is in a different subnet to the container's address.
3652 * To work around this, we try adding a static device route to the
3653 * gateway address first, and then try again.
3654 */
a2f9a670 3655 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3656 if (err < 0) {
a2f9a670 3657 errno = -err;
009d6127 3658 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3659 bufinet4, current_ifname);
009d6127 3660 return -1;
a2f9a670 3661 }
6d1400b5 3662
a2f9a670 3663 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3664 if (err < 0) {
a2f9a670 3665 errno = -err;
009d6127 3666 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3667 bufinet4, current_ifname);
a2f9a670 3668 return -1;
811ef482 3669 }
811ef482
CB
3670 }
3671 }
3672 }
3673
3674 /* setup ipv6 gateway on the interface */
a2f9a670 3675 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3676 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3677 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3678 current_ifname);
811ef482
CB
3679 return -1;
3680 }
3681
3682 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3683 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3684 current_ifname);
811ef482
CB
3685 return -1;
3686 }
3687
a2f9a670 3688 /* Setup device route if ipv6_gateway_dev is enabled */
3689 if (netdev->ipv6_gateway_dev) {
3690 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3691 if (err < 0) {
3692 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3693 current_ifname);
a2f9a670 3694 return minus_one_set_errno(-err);
811ef482 3695 }
a2f9a670 3696 } else {
009d6127 3697 /* Check the gateway address is valid */
3698 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3699 return minus_one_set_errno(errno);
3700
3701 /* Try adding a default route to the gateway address */
811ef482 3702 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3703 if (err < 0) {
3704 /* If adding the default route fails, this could be because the
3705 * gateway address is in a different subnet to the container's address.
3706 * To work around this, we try adding a static device route to the
3707 * gateway address first, and then try again.
3708 */
a2f9a670 3709 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3710 if (err < 0) {
a2f9a670 3711 errno = -err;
009d6127 3712 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3713 bufinet6, current_ifname);
009d6127 3714 return -1;
a2f9a670 3715 }
6d1400b5 3716
a2f9a670 3717 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3718 if (err < 0) {
a2f9a670 3719 errno = -err;
009d6127 3720 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3721 bufinet6, current_ifname);
a2f9a670 3722 return -1;
811ef482 3723 }
811ef482
CB
3724 }
3725 }
3726 }
3727
74c6e2b0 3728 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3729
3730 return 0;
3731}
3732
3733int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3734 struct lxc_list *network)
3735{
3736 struct lxc_list *iterator;
811ef482 3737
811ef482 3738 lxc_list_for_each(iterator, network) {
e389f2af 3739 struct lxc_netdev *netdev = iterator->elem;
811ef482 3740
811ef482 3741 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3742 ERROR("Failed to setup netdev");
811ef482
CB
3743 return -1;
3744 }
3745 }
3746
3747 if (!lxc_list_empty(network))
e389f2af 3748 INFO("Network has been setup");
811ef482
CB
3749
3750 return 0;
3751}
7ab1ba02
CB
3752
3753int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3754{
3755 struct lxc_list *iterator;
3756 struct lxc_list *network = &handler->conf->network;
3757 int data_sock = handler->data_sock[0];
3758
7ab1ba02
CB
3759 lxc_list_for_each(iterator, network) {
3760 int ret;
3761 struct lxc_netdev *netdev = iterator->elem;
3762
3763 if (netdev->type != LXC_NET_VETH)
3764 continue;
3765
7fbb15ec 3766 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3767 if (ret < 0)
7ab1ba02 3768 return -1;
e389f2af
CB
3769
3770 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3771 if (ret < 0)
3772 return -1;
3773
3774 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3775 }
3776
3777 return 0;
3778}
3779
3780int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3781{
3782 struct lxc_list *iterator;
3783 struct lxc_list *network = &handler->conf->network;
3784 int data_sock = handler->data_sock[1];
3785
7ab1ba02
CB
3786 lxc_list_for_each(iterator, network) {
3787 int ret;
3788 struct lxc_netdev *netdev = iterator->elem;
3789
3790 if (netdev->type != LXC_NET_VETH)
3791 continue;
3792
e3233f26 3793 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3794 if (ret < 0)
7ab1ba02 3795 return -1;
e389f2af
CB
3796
3797 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3798 if (ret < 0)
3799 return -1;
3800 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3801 }
3802
3803 return 0;
3804}
a1ae535a
CB
3805
3806int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3807{
3808 struct lxc_list *iterator, *network;
3809 int data_sock = handler->data_sock[0];
3810
3811 if (!handler->am_root)
3812 return 0;
3813
3814 network = &handler->conf->network;
3815 lxc_list_for_each(iterator, network) {
3816 int ret;
3817 struct lxc_netdev *netdev = iterator->elem;
3818
3819 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3820 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3821 if (ret < 0)
7729f8e5 3822 return -1;
a1ae535a
CB
3823
3824 /* Send network device ifindex in the child's namespace to
3825 * parent.
3826 */
7fbb15ec 3827 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3828 if (ret < 0)
7729f8e5 3829 return -1;
a1ae535a
CB
3830 }
3831
e389f2af
CB
3832 if (!lxc_list_empty(network))
3833 TRACE("Sent network device names and ifindices to parent");
3834
a1ae535a 3835 return 0;
a1ae535a
CB
3836}
3837
3838int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3839{
3840 struct lxc_list *iterator, *network;
3841 int data_sock = handler->data_sock[1];
3842
3843 if (!handler->am_root)
3844 return 0;
3845
3846 network = &handler->conf->network;
3847 lxc_list_for_each(iterator, network) {
3848 int ret;
3849 struct lxc_netdev *netdev = iterator->elem;
3850
3851 /* Receive network device name in the child's namespace to
3852 * parent.
3853 */
e3233f26 3854 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3855 if (ret < 0)
7729f8e5 3856 return -1;
a1ae535a
CB
3857
3858 /* Receive network device ifindex in the child's namespace to
3859 * parent.
3860 */
e3233f26 3861 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3862 if (ret < 0)
7729f8e5 3863 return -1;
a1ae535a
CB
3864 }
3865
3866 return 0;
a1ae535a 3867}
bb84beda
CB
3868
3869void lxc_delete_network(struct lxc_handler *handler)
3870{
3871 bool bret;
3872
3873 if (handler->am_root)
3874 bret = lxc_delete_network_priv(handler);
3875 else
3876 bret = lxc_delete_network_unpriv(handler);
3877 if (!bret)
3878 DEBUG("Failed to delete network devices");
3879 else
3880 DEBUG("Deleted network devices");
3881}
1cd95214 3882
1cd95214
CB
3883int lxc_netns_set_nsid(int fd)
3884{
41a3300d 3885 int ret;
0ce60f0d
CB
3886 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3887 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3888 NLMSG_ALIGN(1024)];
1cd95214 3889 struct nl_handler nlh;
0ce60f0d
CB
3890 struct nlmsghdr *hdr;
3891 struct rtgenmsg *msg;
bfcedc7e 3892 int saved_errno;
9d036caa
CB
3893 const __s32 ns_id = -1;
3894 const __u32 netns_fd = fd;
1cd95214
CB
3895
3896 ret = netlink_open(&nlh, NETLINK_ROUTE);
3897 if (ret < 0)
41a3300d 3898 return -1;
1cd95214 3899
0ce60f0d 3900 memset(buf, 0, sizeof(buf));
6ce39620
CB
3901
3902#pragma GCC diagnostic push
3903#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3904 hdr = (struct nlmsghdr *)buf;
3905 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3906#pragma GCC diagnostic pop
1cd95214 3907
0ce60f0d
CB
3908 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3909 hdr->nlmsg_type = RTM_NEWNSID;
3910 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3911 hdr->nlmsg_pid = 0;
3912 hdr->nlmsg_seq = RTM_NEWNSID;
3913 msg->rtgen_family = AF_UNSPEC;
1cd95214 3914
9d036caa
CB
3915 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3916 if (ret < 0)
3917 goto on_error;
3918
3919 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3920 if (ret < 0)
3921 goto on_error;
1cd95214 3922
9fbbc427 3923 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3924
3925on_error:
bfcedc7e 3926 saved_errno = errno;
1cd95214 3927 netlink_close(&nlh);
bfcedc7e 3928 errno = saved_errno;
1cd95214 3929
9d036caa 3930 return ret;
1cd95214 3931}
938980ba
CB
3932
3933static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3934{
3935
3936 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3937
3938 while (RTA_OK(rta, len)) {
3939 unsigned short type = rta->rta_type;
3940
3941 if ((type <= max) && (!tb[type]))
3942 tb[type] = rta;
3943
6ce39620
CB
3944#pragma GCC diagnostic push
3945#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3946 rta = RTA_NEXT(rta, len);
6ce39620 3947#pragma GCC diagnostic pop
938980ba
CB
3948 }
3949
3950 return 0;
3951}
3952
3953static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3954{
3955 return *(__s32 *)RTA_DATA(rta);
3956}
3957
3958#ifndef NETNS_RTA
3959#define NETNS_RTA(r) \
3960 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3961#endif
3962
3963int lxc_netns_get_nsid(int fd)
3964{
3965 int ret;
3966 ssize_t len;
3967 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3968 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3969 NLMSG_ALIGN(1024)];
938980ba
CB
3970 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3971 struct nl_handler nlh;
3972 struct nlmsghdr *hdr;
3973 struct rtgenmsg *msg;
3974 int saved_errno;
3975 __u32 netns_fd = fd;
3976
3977 ret = netlink_open(&nlh, NETLINK_ROUTE);
3978 if (ret < 0)
3979 return -1;
3980
3981 memset(buf, 0, sizeof(buf));
6ce39620
CB
3982
3983#pragma GCC diagnostic push
3984#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3985 hdr = (struct nlmsghdr *)buf;
3986 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3987#pragma GCC diagnostic pop
938980ba
CB
3988
3989 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3990 hdr->nlmsg_type = RTM_GETNSID;
3991 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3992 hdr->nlmsg_pid = 0;
3993 hdr->nlmsg_seq = RTM_GETNSID;
3994 msg->rtgen_family = AF_UNSPEC;
3995
9d036caa
CB
3996 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3997 if (ret == 0)
3998 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3999
938980ba
CB
4000 saved_errno = errno;
4001 netlink_close(&nlh);
4002 errno = saved_errno;
4003 if (ret < 0)
4004 return -1;
4005
9d036caa 4006 errno = EINVAL;
938980ba
CB
4007 msg = NLMSG_DATA(hdr);
4008 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4009 if (len < 0)
4010 return -1;
4011
6ce39620
CB
4012#pragma GCC diagnostic push
4013#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4014 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4015 if (tb[__LXC_NETNSA_NSID])
4016 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4017#pragma GCC diagnostic pop
938980ba
CB
4018
4019 return -1;
4020}
e389f2af
CB
4021
4022int lxc_create_network(struct lxc_handler *handler)
4023{
4024 int ret;
4025
4026 /*
4027 * Find gateway addresses from the link device, which is no longer
4028 * accessible inside the container. Do this before creating network
4029 * interfaces, since goto out_delete_net does not work before
4030 * lxc_clone.
4031 */
4032 ret = lxc_find_gateway_addresses(handler);
4033 if (ret) {
4034 ERROR("Failed to find gateway addresses");
4035 return -1;
4036 }
4037
4038 if (handler->am_root) {
4039 ret = lxc_create_network_priv(handler);
4040 if (ret)
4041 return -1;
4042
4043 return lxc_network_move_created_netdev_priv(handler);
4044 }
4045
4046 return lxc_create_network_unpriv(handler);
4047}