]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: fix network device removal
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
d7b58715 39#include "raw_syscalls.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 50static const char loop_device[] = "lo";
811ef482 51
b670016a 52static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 53{
54 int addrlen, err;
55 struct nl_handler nlh;
56 struct rtmsg *rt;
57 struct nlmsg *answer = NULL, *nlmsg = NULL;
58
59 addrlen = family == AF_INET ? sizeof(struct in_addr)
60 : sizeof(struct in6_addr);
61
62 err = netlink_open(&nlh, NETLINK_ROUTE);
63 if (err)
64 return err;
65
66 err = -ENOMEM;
67 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
68 if (!nlmsg)
69 goto out;
70
71 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
72 if (!answer)
73 goto out;
74
75 nlmsg->nlmsghdr->nlmsg_flags =
76 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 77 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 78
79 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
80 if (!rt)
81 goto out;
82 rt->rtm_family = family;
83 rt->rtm_table = RT_TABLE_MAIN;
84 rt->rtm_scope = RT_SCOPE_LINK;
85 rt->rtm_protocol = RTPROT_BOOT;
86 rt->rtm_type = RTN_UNICAST;
87 rt->rtm_dst_len = netmask;
88
89 err = -EINVAL;
90 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
91 goto out;
92 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
93 goto out;
94 err = netlink_transaction(&nlh, nlmsg, answer);
95out:
96 netlink_close(&nlh);
97 nlmsg_free(answer);
98 nlmsg_free(nlmsg);
99 return err;
100}
101
102static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
103{
b670016a 104 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 105}
106
107static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
108{
b670016a 109 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
110}
111
112static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
113{
114 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
115}
116
117static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
118{
119 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 120}
121
d4a7da46 122static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
123{
124 struct lxc_list *iterator;
125 int err;
126
127 lxc_list_for_each(iterator, ip) {
128 struct lxc_inetdev *inetdev = iterator->elem;
129
130 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
131 if (err) {
132 SYSERROR("Failed to setup ipv4 route for network device "
133 "with ifindex %d", ifindex);
596a002c 134 return ret_set_errno(-1, -err);
d4a7da46 135 }
136 }
137
138 return 0;
139}
140
141static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
142{
143 struct lxc_list *iterator;
144 int err;
145
146 lxc_list_for_each(iterator, ip) {
147 struct lxc_inet6dev *inet6dev = iterator->elem;
148
149 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
150 if (err) {
151 SYSERROR("Failed to setup ipv6 route for network device "
152 "with ifindex %d", ifindex);
596a002c 153 return ret_set_errno(-1, -err);
d4a7da46 154 }
155 }
156
157 return 0;
158}
159
6dfa9581
TP
160static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
161{
162 struct lxc_list *iterator;
163 int err;
164
165 lxc_list_for_each(iterator, ip) {
166 struct lxc_inetdev *inetdev = iterator->elem;
167
168 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
169
170 if (err)
596a002c 171 return log_error_errno(-1, err,
6dfa9581
TP
172 "Failed to setup ipv4 address route for network device with eifindex %d",
173 ifindex);
174 }
175
176 return 0;
177}
178
179static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
180{
181 struct lxc_list *iterator;
182 int err;
183
184 lxc_list_for_each(iterator, ip) {
185 struct lxc_inet6dev *inet6dev = iterator->elem;
186
187 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
188 if (err)
596a002c 189 return log_error_errno(-1, err,
6dfa9581
TP
190 "Failed to setup ipv6 address route for network device with eifindex %d",
191 ifindex);
192 }
193
194 return 0;
195}
196
197struct ip_proxy_args {
198 const char *ip;
199 const char *dev;
200};
201
202static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
203{
204 struct ip_proxy_args *args = data;
205
206 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
207 return -1;
208}
209
210static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
211{
212 struct ip_proxy_args *args = data;
213
214 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
215 return -1;
216}
217
218static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
219{
220 int ret;
221 char cmd_output[PATH_MAX] = {0};
222 struct ip_proxy_args args = {
223 .ip = ip,
224 .dev = dev,
225 };
226
227 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
228 if (ret < 0) {
229 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
230 return -1;
231 }
232
233 return 0;
234}
235
236static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
237{
238 int ret;
239 char cmd_output[PATH_MAX] = {0};
240 struct ip_proxy_args args = {
241 .ip = ip,
242 .dev = dev,
243 };
244
245 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
246 if (ret < 0) {
247 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
248 return -1;
249 }
250
251 return 0;
252}
253
254static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
255{
256 int ret;
257 char path[PATH_MAX];
258 char buf[1] = "";
259
260 if (family != AF_INET && family != AF_INET6)
596a002c 261 return ret_set_errno(-1, EINVAL);
6dfa9581
TP
262
263 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
264 family == AF_INET ? "ipv4" : "ipv6", ifname,
265 "forwarding");
266 if (ret < 0 || (size_t)ret >= PATH_MAX)
596a002c 267 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
268
269 return lxc_read_file_expect(path, buf, 1, "1");
270}
271
811ef482
CB
272static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
273{
54256301
CB
274 int err;
275 unsigned int mtu;
811ef482
CB
276 char *veth1, *veth2;
277 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 278
de4855a8 279 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
280 veth1 = netdev->priv.veth_attr.pair;
281 if (handler->conf->reboot)
282 lxc_netdev_delete_by_name(veth1);
283 } else {
284 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
285 if (err < 0 || (size_t)err >= sizeof(veth1buf))
286 return -1;
287
288 veth1 = lxc_mkifname(veth1buf);
289 if (!veth1)
290 return -1;
291
292 /* store away for deconf */
293 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
294 }
295
d34212ad
CB
296 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
297 if (err < 0 || (size_t)err >= sizeof(veth2buf))
298 return -1;
299
811ef482
CB
300 veth2 = lxc_mkifname(veth2buf);
301 if (!veth2)
54256301
CB
302 return -1;
303
304 if (netdev->mtu && lxc_safe_uint(netdev->mtu, &mtu)) {
305 return log_error_errno(-1, errno, "Failed to parse mtu");
306 } else if (netdev->link[0] != '\0') {
307 int ifindex_mtu;
811ef482 308
54256301
CB
309 ifindex_mtu = if_nametoindex(netdev->link);
310 if (ifindex_mtu) {
311 mtu = netdev_get_mtu(ifindex_mtu);
312 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
313 }
314 }
315
316 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
811ef482 317 if (err) {
6d1400b5 318 errno = -err;
319 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
54256301 320 return -1;
811ef482
CB
321 }
322
24190194
CB
323 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
324
811ef482
CB
325 /* changing the high byte of the mac address to 0xfe, the bridge interface
326 * will always keep the host's mac address and not take the mac address
327 * of a container */
328 err = setup_private_host_hw_addr(veth1);
329 if (err) {
6d1400b5 330 errno = -err;
331 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
332 goto out_delete;
333 }
334
8da62485
CB
335 /* Retrieve ifindex of the host's veth device. */
336 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
337 if (!netdev->priv.veth_attr.ifindex) {
338 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
339 goto out_delete;
340 }
341
811ef482
CB
342 if (mtu) {
343 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 344 if (err) {
6d1400b5 345 errno = -err;
54256301 346 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
347 goto out_delete;
348 }
349 }
350
6dfa9581 351 if (netdev->link[0] != '\0' && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
811ef482
CB
352 err = lxc_bridge_attach(netdev->link, veth1);
353 if (err) {
6d1400b5 354 errno = -err;
355 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
356 veth1, netdev->link);
811ef482
CB
357 goto out_delete;
358 }
359 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
360 }
361
362 err = lxc_netdev_up(veth1);
363 if (err) {
6d1400b5 364 errno = -err;
365 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
366 goto out_delete;
367 }
368
d4a7da46 369 /* setup ipv4 routes on the host interface */
370 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
371 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
372 goto out_delete;
373 }
374
375 /* setup ipv6 routes on the host interface */
376 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
377 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
378 goto out_delete;
379 }
380
6dfa9581
TP
381 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
382 if (netdev->ipv4_gateway) {
383 char bufinet4[INET_ADDRSTRLEN];
384 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
596a002c 385 log_error_errno(-1, -errno, "Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
386 goto out_delete;
387 }
388
389 err = lxc_ip_forwarding_on(veth1, AF_INET);
390 if (err) {
596a002c 391 log_error_errno(-1, err, "Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
392 goto out_delete;
393 }
394
395 err = lxc_add_ip_neigh_proxy(bufinet4, veth1);
396 if (err) {
596a002c 397 log_error_errno(-1, err, "Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
398 goto out_delete;
399 }
400 }
401
402 if (netdev->ipv6_gateway) {
403 char bufinet6[INET6_ADDRSTRLEN];
404
405 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
596a002c 406 log_error_errno(-1, -errno, "Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
407 goto out_delete;
408 }
409
410 /* Check for sysctl net.ipv6.conf.all.forwarding=1
411 Kernel requires this to route any packets for IPv6.
412 */
413 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
414 if (err) {
596a002c 415 log_error_errno(-1, err, "Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
416 goto out_delete;
417 }
418
419 err = lxc_ip_forwarding_on(veth1, AF_INET6);
420 if (err) {
596a002c 421 log_error_errno(-1, err, "Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
422 goto out_delete;
423 }
424
425 err = lxc_neigh_proxy_on(veth1, AF_INET6);
426 if (err) {
596a002c 427 log_error_errno(-1, err, "Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
428 goto out_delete;
429 }
430
431 err = lxc_add_ip_neigh_proxy(bufinet6, veth1);
432 if (err) {
596a002c 433 log_error_errno(-1, err, "Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
434 goto out_delete;
435 }
436 }
437
438 /* setup ipv4 address routes on the host interface */
439 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
440 if (err) {
596a002c 441 log_error_errno(-1, err, "Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
442 goto out_delete;
443 }
444
445 /* setup ipv6 address routes on the host interface */
446 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
447 if (err) {
596a002c 448 log_error_errno(-1, err, "Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
449 goto out_delete;
450 }
451 }
452
811ef482 453 if (netdev->upscript) {
14a7b0f9
CB
454 char *argv[] = {
455 "veth",
456 netdev->link,
990b9ac3 457 veth1,
14a7b0f9
CB
458 NULL,
459 };
460
461 err = run_script_argv(handler->name,
462 handler->conf->hooks_version, "net",
463 netdev->upscript, "up", argv);
464 if (err < 0)
811ef482
CB
465 goto out_delete;
466 }
467
54256301 468 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
469
470 return 0;
471
472out_delete:
54256301 473 lxc_netdev_delete_by_name(veth1);
811ef482
CB
474 return -1;
475}
476
477static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
478{
8021de25 479 char peer[IFNAMSIZ];
811ef482
CB
480 int err;
481
de4855a8 482 if (netdev->link[0] == '\0') {
811ef482
CB
483 ERROR("No link for macvlan network device specified");
484 return -1;
485 }
486
8021de25
CB
487 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
488 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
489 return -1;
490
8021de25 491 if (!lxc_mkifname(peer))
811ef482
CB
492 return -1;
493
494 err = lxc_macvlan_create(netdev->link, peer,
495 netdev->priv.macvlan_attr.mode);
496 if (err) {
6d1400b5 497 errno = -err;
498 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
499 peer, netdev->link);
966e9f1f 500 goto on_error;
811ef482
CB
501 }
502
a9704f05
CB
503 strlcpy(netdev->created_name, peer, IFNAMSIZ);
504
811ef482
CB
505 netdev->ifindex = if_nametoindex(peer);
506 if (!netdev->ifindex) {
507 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 508 goto on_error;
811ef482
CB
509 }
510
3bef7b7b 511 if (netdev->mtu) {
54256301
CB
512 unsigned int mtu;
513
3bef7b7b
TP
514 err = lxc_safe_uint(netdev->mtu, &mtu);
515 if (err < 0) {
516 errno = -err;
517 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
518 goto on_error;
519 }
520
521 err = lxc_netdev_set_mtu(peer, mtu);
522 if (err < 0) {
523 errno = -err;
524 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
525 goto on_error;
526 }
527 }
528
811ef482 529 if (netdev->upscript) {
14a7b0f9
CB
530 char *argv[] = {
531 "macvlan",
532 netdev->link,
533 NULL,
534 };
535
536 err = run_script_argv(handler->name,
537 handler->conf->hooks_version, "net",
538 netdev->upscript, "up", argv);
539 if (err < 0)
966e9f1f 540 goto on_error;
811ef482
CB
541 }
542
543 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
544 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
545
546 return 0;
966e9f1f
CB
547
548on_error:
811ef482 549 lxc_netdev_delete_by_name(peer);
811ef482
CB
550 return -1;
551}
552
c9f52382 553static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
554{
555 int err, index, len;
556 struct ifinfomsg *ifi;
557 struct nl_handler nlh;
558 struct rtattr *nest, *nest2;
559 struct nlmsg *answer = NULL, *nlmsg = NULL;
560
561 len = strlen(master);
562 if (len == 1 || len >= IFNAMSIZ)
596a002c 563 return ret_set_errno(-1, EINVAL);
c9f52382 564
565 len = strlen(name);
566 if (len == 1 || len >= IFNAMSIZ)
596a002c 567 return ret_set_errno(-1, EINVAL);
c9f52382 568
569 index = if_nametoindex(master);
570 if (!index)
596a002c 571 return ret_set_errno(-1, EINVAL);
c9f52382 572
573 err = netlink_open(&nlh, NETLINK_ROUTE);
574 if (err)
596a002c 575 return ret_set_errno(-1, -err);
c9f52382 576
577 err = -ENOMEM;
578 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
579 if (!nlmsg)
580 goto out;
581
582 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
583 if (!answer)
584 goto out;
585
586 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
587 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
588
589 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
590 if (!ifi) {
591 goto out;
592 }
593 ifi->ifi_family = AF_UNSPEC;
594
595 err = -EPROTO;
596 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
597 if (!nest)
598 goto out;
599
600 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
601 goto out;
602
603 if (mode) {
604 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
605 if (!nest2)
606 goto out;
607
608 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
609 goto out;
610
611 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
612 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
613 */
614 if (isolation > 0) {
615 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
616 goto out;
617 }
618
619 nla_end_nested(nlmsg, nest2);
620 }
621
622 nla_end_nested(nlmsg, nest);
623
624 if (nla_put_u32(nlmsg, IFLA_LINK, index))
625 goto out;
626
627 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
628 goto out;
629
630 err = netlink_transaction(&nlh, nlmsg, answer);
631out:
632 netlink_close(&nlh);
633 nlmsg_free(answer);
634 nlmsg_free(nlmsg);
635 if (err < 0)
596a002c 636 return ret_set_errno(-1, -err);
c9f52382 637 return 0;
638}
639
640static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
641{
dd119206 642 char peer[IFNAMSIZ];
c9f52382 643 int err;
644
645 if (netdev->link[0] == '\0') {
646 ERROR("No link for ipvlan network device specified");
647 return -1;
648 }
649
dd119206
CB
650 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
651 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 652 return -1;
653
dd119206 654 if (!lxc_mkifname(peer))
c9f52382 655 return -1;
656
dd119206
CB
657 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
658 netdev->priv.ipvlan_attr.isolation);
c9f52382 659 if (err) {
dd119206
CB
660 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
661 peer, netdev->link);
c9f52382 662 goto on_error;
663 }
664
e7fdd504
CB
665 strlcpy(netdev->created_name, peer, IFNAMSIZ);
666
c9f52382 667 netdev->ifindex = if_nametoindex(peer);
668 if (!netdev->ifindex) {
669 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
670 goto on_error;
671 }
672
006e135e 673 if (netdev->mtu) {
54256301
CB
674 unsigned int mtu;
675
006e135e 676 err = lxc_safe_uint(netdev->mtu, &mtu);
677 if (err < 0) {
678 errno = -err;
54256301 679 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 680 goto on_error;
681 }
682
683 err = lxc_netdev_set_mtu(peer, mtu);
684 if (err < 0) {
685 errno = -err;
54256301 686 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 687 goto on_error;
688 }
689 }
690
c9f52382 691 if (netdev->upscript) {
692 char *argv[] = {
693 "ipvlan",
694 netdev->link,
695 NULL,
696 };
697
dd119206
CB
698 err = run_script_argv(handler->name, handler->conf->hooks_version,
699 "net", netdev->upscript, "up", argv);
c9f52382 700 if (err < 0)
701 goto on_error;
702 }
703
dd119206
CB
704 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
705 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 706
707 return 0;
708
709on_error:
710 lxc_netdev_delete_by_name(peer);
711 return -1;
712}
713
811ef482
CB
714static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
715{
716 char peer[IFNAMSIZ];
717 int err;
718 static uint16_t vlan_cntr = 0;
811ef482 719
de4855a8 720 if (netdev->link[0] == '\0') {
811ef482
CB
721 ERROR("No link for vlan network device specified");
722 return -1;
723 }
724
d4d68410
CB
725 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
726 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
727 if (err < 0 || (size_t)err >= sizeof(peer))
728 return -1;
729
730 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
731 if (err) {
6d1400b5 732 errno = -err;
733 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
734 peer, netdev->link);
811ef482
CB
735 return -1;
736 }
737
83530dba
CB
738 strlcpy(netdev->created_name, peer, IFNAMSIZ);
739
811ef482
CB
740 netdev->ifindex = if_nametoindex(peer);
741 if (!netdev->ifindex) {
742 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 743 goto on_error;
744 }
745
746 if (netdev->mtu) {
54256301
CB
747 unsigned int mtu;
748
3e2a7b08 749 err = lxc_safe_uint(netdev->mtu, &mtu);
750 if (err < 0) {
751 errno = -err;
54256301 752 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 753 goto on_error;
754 }
755
756 err = lxc_netdev_set_mtu(peer, mtu);
54256301 757 if (err < 0) {
3e2a7b08 758 errno = -err;
54256301 759 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 760 goto on_error;
761 }
811ef482
CB
762 }
763
3a73d9f1 764 if (netdev->upscript) {
765 char *argv[] = {
766 "vlan",
767 netdev->link,
768 NULL,
769 };
770
d4d68410
CB
771 err = run_script_argv(handler->name, handler->conf->hooks_version,
772 "net", netdev->upscript, "up", argv);
19abca58 773 if (err < 0) {
3e2a7b08 774 goto on_error;
19abca58 775 }
3a73d9f1 776 }
777
d4d68410
CB
778 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
779 netdev->ifindex);
811ef482
CB
780
781 return 0;
3e2a7b08 782
783on_error:
784 lxc_netdev_delete_by_name(peer);
785 return -1;
811ef482
CB
786}
787
788static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
789{
0b154989 790 int err, mtu_orig = 0;
14a7b0f9 791
de4855a8 792 if (netdev->link[0] == '\0') {
811ef482
CB
793 ERROR("No link for physical interface specified");
794 return -1;
795 }
796
75b074ee
CB
797 /*
798 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
799 * network namespace because we need it to move the device from the
800 * host's network namespace to the container's network namespace later
801 * on.
802 * Note that netdev->link will contain the name of the physical network
803 * device in the host's namespace.
804 */
811ef482
CB
805 netdev->ifindex = if_nametoindex(netdev->link);
806 if (!netdev->ifindex) {
807 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
808 return -1;
809 }
810
61302ef7
CB
811 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
812
75b074ee
CB
813 /*
814 * Store the ifindex of the host's network device in the host's
790255cf
CB
815 * namespace.
816 */
817 netdev->priv.phys_attr.ifindex = netdev->ifindex;
818
75b074ee
CB
819 /*
820 * Get original device MTU setting and store for restoration after
821 * container shutdown.
822 */
0b154989
TP
823 mtu_orig = netdev_get_mtu(netdev->ifindex);
824 if (mtu_orig < 0) {
825 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
596a002c 826 return ret_set_errno(-1, -mtu_orig);
0b154989
TP
827 }
828
829 netdev->priv.phys_attr.mtu = mtu_orig;
830
3bef7b7b 831 if (netdev->mtu) {
54256301
CB
832 unsigned int mtu;
833
3bef7b7b
TP
834 err = lxc_safe_uint(netdev->mtu, &mtu);
835 if (err < 0) {
836 errno = -err;
75b074ee
CB
837 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
838 netdev->mtu, netdev->link);
3bef7b7b
TP
839 return -1;
840 }
14a7b0f9 841
3bef7b7b
TP
842 err = lxc_netdev_set_mtu(netdev->link, mtu);
843 if (err < 0) {
844 errno = -err;
54256301 845 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
846 return -1;
847 }
848 }
849
850 if (netdev->upscript) {
851 char *argv[] = {
852 "phys",
853 netdev->link,
854 NULL,
855 };
856
75b074ee
CB
857 err = run_script_argv(handler->name, handler->conf->hooks_version,
858 "net", netdev->upscript, "up", argv);
3bef7b7b
TP
859 if (err < 0) {
860 return -1;
861 }
862 }
863
75b074ee
CB
864 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
865 netdev->ifindex);
811ef482
CB
866
867 return 0;
868}
869
870static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
871{
14a7b0f9
CB
872 int ret;
873 char *argv[] = {
874 "empty",
875 NULL,
876 };
877
811ef482 878 netdev->ifindex = 0;
14a7b0f9
CB
879 if (!netdev->upscript)
880 return 0;
881
882 ret = run_script_argv(handler->name, handler->conf->hooks_version,
883 "net", netdev->upscript, "up", argv);
884 if (ret < 0)
885 return -1;
886
811ef482
CB
887 return 0;
888}
889
890static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
891{
892 netdev->ifindex = 0;
893 return 0;
894}
895
896static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
897 [LXC_NET_VETH] = instantiate_veth,
898 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 899 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
900 [LXC_NET_VLAN] = instantiate_vlan,
901 [LXC_NET_PHYS] = instantiate_phys,
902 [LXC_NET_EMPTY] = instantiate_empty,
903 [LXC_NET_NONE] = instantiate_none,
904};
905
906static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
907{
14a7b0f9
CB
908 int ret;
909 char *argv[] = {
910 "veth",
911 netdev->link,
912 NULL,
913 NULL,
914 };
915
916 if (!netdev->downscript)
917 return 0;
811ef482 918
de4855a8 919 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 920 argv[2] = netdev->priv.veth_attr.pair;
811ef482 921 else
14a7b0f9
CB
922 argv[2] = netdev->priv.veth_attr.veth1;
923
924 ret = run_script_argv(handler->name,
925 handler->conf->hooks_version, "net",
926 netdev->downscript, "down", argv);
927 if (ret < 0)
928 return -1;
811ef482 929
811ef482
CB
930 return 0;
931}
932
933static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
934{
14a7b0f9
CB
935 int ret;
936 char *argv[] = {
937 "macvlan",
938 netdev->link,
939 NULL,
940 };
941
942 if (!netdev->downscript)
943 return 0;
944
945 ret = run_script_argv(handler->name, handler->conf->hooks_version,
946 "net", netdev->downscript, "down", argv);
947 if (ret < 0)
948 return -1;
811ef482 949
811ef482
CB
950 return 0;
951}
952
c9f52382 953static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
954{
955 int ret;
956 char *argv[] = {
957 "ipvlan",
958 netdev->link,
959 NULL,
960 };
961
962 if (!netdev->downscript)
963 return 0;
964
965 ret = run_script_argv(handler->name, handler->conf->hooks_version,
966 "net", netdev->downscript, "down", argv);
967 if (ret < 0)
968 return -1;
969
970 return 0;
971}
972
811ef482
CB
973static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
974{
3a73d9f1 975 int ret;
976 char *argv[] = {
977 "vlan",
978 netdev->link,
979 NULL,
980 };
981
982 if (!netdev->downscript)
983 return 0;
984
985 ret = run_script_argv(handler->name, handler->conf->hooks_version,
986 "net", netdev->downscript, "down", argv);
987 if (ret < 0)
988 return -1;
989
811ef482
CB
990 return 0;
991}
992
993static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
994{
14a7b0f9
CB
995 int ret;
996 char *argv[] = {
997 "phys",
998 netdev->link,
999 NULL,
1000 };
1001
1002 if (!netdev->downscript)
1003 return 0;
1004
1005 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1006 "net", netdev->downscript, "down", argv);
1007 if (ret < 0)
1008 return -1;
811ef482 1009
811ef482
CB
1010 return 0;
1011}
1012
1013static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1014{
14a7b0f9
CB
1015 int ret;
1016 char *argv[] = {
1017 "empty",
1018 NULL,
1019 };
1020
1021 if (!netdev->downscript)
1022 return 0;
1023
1024 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1025 "net", netdev->downscript, "down", argv);
1026 if (ret < 0)
1027 return -1;
811ef482 1028
811ef482
CB
1029 return 0;
1030}
1031
1032static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1033{
1034 return 0;
1035}
1036
1037static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1038 [LXC_NET_VETH] = shutdown_veth,
1039 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1040 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1041 [LXC_NET_VLAN] = shutdown_vlan,
1042 [LXC_NET_PHYS] = shutdown_phys,
1043 [LXC_NET_EMPTY] = shutdown_empty,
1044 [LXC_NET_NONE] = shutdown_none,
1045};
1046
0037ab49
TP
1047static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1048{
1049 int err;
1050 struct nl_handler nlh;
1051 struct ifinfomsg *ifi;
1052 struct nlmsg *nlmsg = NULL;
1053
1054 err = netlink_open(&nlh, NETLINK_ROUTE);
1055 if (err)
1056 return err;
1057
1058 err = -ENOMEM;
1059 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1060 if (!nlmsg)
1061 goto out;
1062
1063 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1064 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1065
1066 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1067 if (!ifi)
1068 goto out;
1069 ifi->ifi_family = AF_UNSPEC;
1070 ifi->ifi_index = ifindex;
1071
1072 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
1073 goto out;
1074
1075 if (ifname != NULL) {
1076 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1077 goto out;
1078 }
1079
1080 err = netlink_transaction(&nlh, nlmsg, nlmsg);
1081out:
1082 netlink_close(&nlh);
1083 nlmsg_free(nlmsg);
1084 return err;
1085}
1086
ebc73a67 1087int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1088{
ebc73a67 1089 int err;
0ad19a3f 1090 struct nl_handler nlh;
06f976ca 1091 struct ifinfomsg *ifi;
ebc73a67 1092 struct nlmsg *nlmsg = NULL;
0ad19a3f 1093
3cfc0f3a
MN
1094 err = netlink_open(&nlh, NETLINK_ROUTE);
1095 if (err)
1096 return err;
0ad19a3f 1097
3cfc0f3a 1098 err = -ENOMEM;
0ad19a3f 1099 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1100 if (!nlmsg)
1101 goto out;
1102
ebc73a67 1103 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1104 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1105
1106 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1107 if (!ifi)
1108 goto out;
06f976ca
SZ
1109 ifi->ifi_family = AF_UNSPEC;
1110 ifi->ifi_index = ifindex;
0ad19a3f 1111
1112 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1113 goto out;
1114
8d357196
DY
1115 if (ifname != NULL) {
1116 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1117 goto out;
1118 }
1119
3cfc0f3a 1120 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 1121out:
1122 netlink_close(&nlh);
1123 nlmsg_free(nlmsg);
1124 return err;
1125}
1126
ebc73a67
CB
1127/* If we are asked to move a wireless interface, then we must actually move its
1128 * phyN device. Detect that condition and return the physname here. The physname
1129 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1130 */
1131#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1132char *is_wlan(const char *ifname)
e5848d39 1133{
b0293710 1134 __do_free char *path = NULL;
ebc73a67 1135 int i, ret;
e5848d39 1136 long physlen;
ebc73a67 1137 size_t len;
e5848d39 1138 FILE *f;
ebc73a67 1139 char *physname = NULL;
e5848d39 1140
ebc73a67 1141 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1142 path = must_realloc(NULL, len + 1);
e5848d39 1143 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1144 if (ret < 0 || (size_t)ret >= len)
e5848d39 1145 goto bad;
ebc73a67 1146
ebc73a67
CB
1147 f = fopen(path, "r");
1148 if (!f)
e5848d39 1149 goto bad;
ebc73a67 1150
1a0e70ac 1151 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1152 fseek(f, 0, SEEK_END);
1153 physlen = ftell(f);
1154 fseek(f, 0, SEEK_SET);
7d1cde93
SX
1155 if (physlen < 0) {
1156 fclose(f);
0382c0da 1157 goto bad;
7d1cde93 1158 }
ebc73a67
CB
1159
1160 physname = malloc(physlen + 1);
ee54ea9a 1161 if (!physname) {
acf47e1b 1162 fclose(f);
e5848d39 1163 goto bad;
ee54ea9a 1164 }
ebc73a67
CB
1165
1166 memset(physname, 0, physlen + 1);
e5848d39
SH
1167 ret = fread(physname, 1, physlen, f);
1168 fclose(f);
1169 if (ret < 0)
1170 goto bad;
1171
ebc73a67 1172 for (i = 0; i < physlen; i++) {
e5848d39
SH
1173 if (physname[i] == '\n')
1174 physname[i] = '\0';
ebc73a67 1175
e5848d39
SH
1176 if (physname[i] == '\0')
1177 break;
1178 }
1179
1180 return physname;
1181
1182bad:
f10fad2f 1183 free(physname);
e5848d39
SH
1184 return NULL;
1185}
1186
ebc73a67
CB
1187static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1188 const char *new)
e5848d39 1189{
ebc73a67 1190 pid_t fpid;
e5848d39 1191
ebc73a67 1192 fpid = fork();
e5848d39
SH
1193 if (fpid < 0)
1194 return -1;
ebc73a67 1195
e5848d39
SH
1196 if (fpid != 0)
1197 return wait_for_pid(fpid);
ebc73a67 1198
e5848d39
SH
1199 if (!switch_to_ns(pid, "net"))
1200 return -1;
ebc73a67 1201
05ec44f8 1202 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1203}
1204
e4103cf6 1205int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1206 const char *newname)
e5848d39 1207{
3dd78294 1208 __do_free char *cmd = NULL;
ebc73a67 1209 pid_t fpid;
e5848d39
SH
1210
1211 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1212 * However, IIUC this involves a bit more complicated work to talk to
1213 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1214 */
1215 cmd = on_path("iw", NULL);
3dd78294
CB
1216 if (!cmd) {
1217 return -1;
1218 }
e5848d39
SH
1219
1220 fpid = fork();
1221 if (fpid < 0)
3dd78294 1222 return -1;
ebc73a67 1223
e5848d39
SH
1224 if (fpid == 0) {
1225 char pidstr[30];
1226 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1227 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1228 (char *)NULL);
ebd582ae 1229 _exit(EXIT_FAILURE);
e5848d39 1230 }
ebc73a67 1231
e5848d39 1232 if (wait_for_pid(fpid))
3dd78294 1233 return -1;
e5848d39 1234
e5848d39 1235 if (newname)
3dd78294 1236 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1237
3dd78294 1238 return 0;
e5848d39
SH
1239}
1240
8d357196 1241int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1242{
3dd78294 1243 __do_free char *physname = NULL;
8befa924
SH
1244 int index;
1245
8befa924
SH
1246 if (!ifname)
1247 return -EINVAL;
1248
32571606 1249 index = if_nametoindex(ifname);
49428bf3
DY
1250 if (!index)
1251 return -EINVAL;
32571606 1252
ebc73a67
CB
1253 physname = is_wlan(ifname);
1254 if (physname)
e5848d39
SH
1255 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1256
8d357196 1257 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1258}
1259
b84f58b9 1260int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1261{
b84f58b9 1262 int err;
ebc73a67
CB
1263 struct ifinfomsg *ifi;
1264 struct nl_handler nlh;
1265 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1266
3cfc0f3a
MN
1267 err = netlink_open(&nlh, NETLINK_ROUTE);
1268 if (err)
1269 return err;
0ad19a3f 1270
3cfc0f3a 1271 err = -ENOMEM;
0ad19a3f 1272 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1273 if (!nlmsg)
1274 goto out;
1275
06f976ca 1276 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1277 if (!answer)
1278 goto out;
1279
ebc73a67 1280 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1281 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1282
1283 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1284 if (!ifi)
1285 goto out;
06f976ca
SZ
1286 ifi->ifi_family = AF_UNSPEC;
1287 ifi->ifi_index = ifindex;
0ad19a3f 1288
3cfc0f3a 1289 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1290out:
1291 netlink_close(&nlh);
1292 nlmsg_free(answer);
1293 nlmsg_free(nlmsg);
1294 return err;
1295}
1296
b84f58b9
DL
1297int lxc_netdev_delete_by_name(const char *name)
1298{
1299 int index;
1300
1301 index = if_nametoindex(name);
1302 if (!index)
1303 return -EINVAL;
1304
1305 return lxc_netdev_delete_by_index(index);
1306}
1307
1308int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1309{
ebc73a67 1310 int err, len;
06f976ca 1311 struct ifinfomsg *ifi;
ebc73a67
CB
1312 struct nl_handler nlh;
1313 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1314
3cfc0f3a
MN
1315 err = netlink_open(&nlh, NETLINK_ROUTE);
1316 if (err)
1317 return err;
b9a5bb58 1318
b84f58b9 1319 len = strlen(newname);
90d79629
CB
1320 if (len == 1 || len >= IFNAMSIZ) {
1321 err = -EINVAL;
b84f58b9 1322 goto out;
90d79629 1323 }
b84f58b9 1324
3cfc0f3a 1325 err = -ENOMEM;
b9a5bb58
DL
1326 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1327 if (!nlmsg)
1328 goto out;
1329
06f976ca 1330 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1331 if (!answer)
1332 goto out;
1333
ebc73a67 1334 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1335 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1336
1337 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1338 if (!ifi)
1339 goto out;
06f976ca
SZ
1340 ifi->ifi_family = AF_UNSPEC;
1341 ifi->ifi_index = ifindex;
b84f58b9
DL
1342
1343 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1344 goto out;
b9a5bb58 1345
3cfc0f3a 1346 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1347out:
1348 netlink_close(&nlh);
1349 nlmsg_free(answer);
1350 nlmsg_free(nlmsg);
1351 return err;
1352}
1353
b84f58b9
DL
1354int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1355{
1356 int len, index;
1357
1358 len = strlen(oldname);
dae3fdf6 1359 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1360 return -EINVAL;
1361
1362 index = if_nametoindex(oldname);
1363 if (!index)
1364 return -EINVAL;
1365
1366 return lxc_netdev_rename_by_index(index, newname);
1367}
1368
8befa924 1369int netdev_set_flag(const char *name, int flag)
0ad19a3f 1370{
ebc73a67 1371 int err, index, len;
06f976ca 1372 struct ifinfomsg *ifi;
ebc73a67
CB
1373 struct nl_handler nlh;
1374 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1375
3cfc0f3a
MN
1376 err = netlink_open(&nlh, NETLINK_ROUTE);
1377 if (err)
1378 return err;
0ad19a3f 1379
3cfc0f3a 1380 err = -EINVAL;
0ad19a3f 1381 len = strlen(name);
dae3fdf6 1382 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1383 goto out;
1384
3cfc0f3a 1385 err = -ENOMEM;
0ad19a3f 1386 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1387 if (!nlmsg)
1388 goto out;
1389
06f976ca 1390 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1391 if (!answer)
1392 goto out;
1393
3cfc0f3a 1394 err = -EINVAL;
0ad19a3f 1395 index = if_nametoindex(name);
1396 if (!index)
1397 goto out;
1398
ebc73a67 1399 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1400 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1401
1402 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1403 if (!ifi) {
1404 err = -ENOMEM;
1405 goto out;
1406 }
06f976ca
SZ
1407 ifi->ifi_family = AF_UNSPEC;
1408 ifi->ifi_index = index;
1409 ifi->ifi_change |= IFF_UP;
1410 ifi->ifi_flags |= flag;
0ad19a3f 1411
1412 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1413out:
1414 netlink_close(&nlh);
1415 nlmsg_free(nlmsg);
1416 nlmsg_free(answer);
1417 return err;
1418}
1419
ebc73a67 1420int netdev_get_flag(const char *name, int *flag)
efa1cf45 1421{
ebc73a67 1422 int err, index, len;
a4318300 1423 struct ifinfomsg *ifi;
ebc73a67
CB
1424 struct nl_handler nlh;
1425 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1426
1427 if (!name)
1428 return -EINVAL;
1429
1430 err = netlink_open(&nlh, NETLINK_ROUTE);
1431 if (err)
1432 return err;
1433
1434 err = -EINVAL;
1435 len = strlen(name);
1436 if (len == 1 || len >= IFNAMSIZ)
1437 goto out;
1438
1439 err = -ENOMEM;
1440 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1441 if (!nlmsg)
1442 goto out;
1443
06f976ca 1444 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1445 if (!answer)
1446 goto out;
1447
1448 err = -EINVAL;
1449 index = if_nametoindex(name);
1450 if (!index)
1451 goto out;
1452
06f976ca
SZ
1453 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1454 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1455
1456 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1457 if (!ifi) {
1458 err = -ENOMEM;
1459 goto out;
1460 }
06f976ca
SZ
1461 ifi->ifi_family = AF_UNSPEC;
1462 ifi->ifi_index = index;
efa1cf45
DY
1463
1464 err = netlink_transaction(&nlh, nlmsg, answer);
1465 if (err)
1466 goto out;
1467
06f976ca 1468 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1469
1470 *flag = ifi->ifi_flags;
1471out:
1472 netlink_close(&nlh);
1473 nlmsg_free(nlmsg);
1474 nlmsg_free(answer);
1475 return err;
1476}
1477
1478/*
1479 * \brief Check a interface is up or not.
1480 *
1481 * \param name: name for the interface.
1482 *
1483 * \return int.
1484 * 0 means interface is down.
1485 * 1 means interface is up.
1486 * Others means error happened, and ret-value is the error number.
1487 */
ebc73a67 1488int lxc_netdev_isup(const char *name)
efa1cf45 1489{
ebc73a67 1490 int err, flag;
efa1cf45
DY
1491
1492 err = netdev_get_flag(name, &flag);
1493 if (err)
ebc73a67
CB
1494 return err;
1495
efa1cf45
DY
1496 if (flag & IFF_UP)
1497 return 1;
ebc73a67 1498
efa1cf45 1499 return 0;
efa1cf45
DY
1500}
1501
0130df54
SH
1502int netdev_get_mtu(int ifindex)
1503{
ebc73a67 1504 int answer_len, err, res;
0130df54 1505 struct nl_handler nlh;
06f976ca 1506 struct ifinfomsg *ifi;
0130df54 1507 struct nlmsghdr *msg;
ebc73a67
CB
1508 int readmore = 0, recv_len = 0;
1509 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1510
1511 err = netlink_open(&nlh, NETLINK_ROUTE);
1512 if (err)
1513 return err;
1514
1515 err = -ENOMEM;
1516 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1517 if (!nlmsg)
1518 goto out;
1519
06f976ca 1520 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1521 if (!answer)
1522 goto out;
1523
1524 /* Save the answer buffer length, since it will be overwritten
1525 * on the first receive (and we might need to receive more than
ebc73a67
CB
1526 * once.
1527 */
06f976ca
SZ
1528 answer_len = answer->nlmsghdr->nlmsg_len;
1529
ebc73a67 1530 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1531 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1532
06f976ca 1533 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1534 if (!ifi)
1535 goto out;
06f976ca 1536 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1537
1538 /* Send the request for addresses, which returns all addresses
1539 * on all interfaces. */
1540 err = netlink_send(&nlh, nlmsg);
1541 if (err < 0)
1542 goto out;
1543
6ce39620
CB
1544#pragma GCC diagnostic push
1545#pragma GCC diagnostic ignored "-Wcast-align"
1546
0130df54
SH
1547 do {
1548 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1549 * overwritten by a previous receive.
1550 */
06f976ca 1551 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1552
1553 /* Get the (next) batch of reply messages */
1554 err = netlink_rcv(&nlh, answer);
1555 if (err < 0)
1556 goto out;
1557
1558 recv_len = err;
0130df54
SH
1559
1560 /* Satisfy the typing for the netlink macros */
06f976ca 1561 msg = answer->nlmsghdr;
0130df54
SH
1562
1563 while (NLMSG_OK(msg, recv_len)) {
1564
1565 /* Stop reading if we see an error message */
1566 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1567 struct nlmsgerr *errmsg =
1568 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1569 err = errmsg->error;
1570 goto out;
1571 }
1572
1573 /* Stop reading if we see a NLMSG_DONE message */
1574 if (msg->nlmsg_type == NLMSG_DONE) {
1575 readmore = 0;
1576 break;
1577 }
1578
06f976ca 1579 ifi = NLMSG_DATA(msg);
0130df54
SH
1580 if (ifi->ifi_index == ifindex) {
1581 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1582 int attr_len =
1583 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1584 res = 0;
ebc73a67
CB
1585 while (RTA_OK(rta, attr_len)) {
1586 /* Found a local address for the
1587 * requested interface, return it.
1588 */
0130df54 1589 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1590 memcpy(&res, RTA_DATA(rta),
1591 sizeof(int));
0130df54
SH
1592 err = res;
1593 goto out;
1594 }
1595 rta = RTA_NEXT(rta, attr_len);
1596 }
0130df54
SH
1597 }
1598
ebc73a67
CB
1599 /* Keep reading more data from the socket if the last
1600 * message had the NLF_F_MULTI flag set.
1601 */
0130df54
SH
1602 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1603
ebc73a67 1604 /* Look at the next message received in this buffer. */
0130df54
SH
1605 msg = NLMSG_NEXT(msg, recv_len);
1606 }
1607 } while (readmore);
1608
6ce39620
CB
1609#pragma GCC diagnostic pop
1610
ebc73a67 1611 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1612 err = -1;
1613
1614out:
1615 netlink_close(&nlh);
1616 nlmsg_free(answer);
1617 nlmsg_free(nlmsg);
1618 return err;
1619}
1620
d472214b 1621int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1622{
54256301 1623 int err, len;
06f976ca 1624 struct ifinfomsg *ifi;
ebc73a67
CB
1625 struct nl_handler nlh;
1626 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1627
3cfc0f3a
MN
1628 err = netlink_open(&nlh, NETLINK_ROUTE);
1629 if (err)
1630 return err;
75d09f83 1631
3cfc0f3a 1632 err = -EINVAL;
75d09f83 1633 len = strlen(name);
dae3fdf6 1634 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1635 goto out;
1636
3cfc0f3a 1637 err = -ENOMEM;
75d09f83
DL
1638 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1639 if (!nlmsg)
1640 goto out;
1641
06f976ca 1642 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1643 if (!answer)
1644 goto out;
1645
ebc73a67 1646 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1647 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1648
1649 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1650 if (!ifi) {
1651 err = -ENOMEM;
1652 goto out;
1653 }
06f976ca 1654 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1655
1656 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1657 goto out;
75d09f83
DL
1658
1659 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1660 goto out;
1661
1662 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1663out:
1664 netlink_close(&nlh);
1665 nlmsg_free(nlmsg);
1666 nlmsg_free(answer);
1667 return err;
1668}
1669
d472214b 1670int lxc_netdev_up(const char *name)
0ad19a3f 1671{
d472214b 1672 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1673}
1674
d472214b 1675int lxc_netdev_down(const char *name)
0ad19a3f 1676{
d472214b 1677 return netdev_set_flag(name, 0);
0ad19a3f 1678}
1679
54256301 1680int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1681{
ebc73a67 1682 int err, len;
06f976ca 1683 struct ifinfomsg *ifi;
ebc73a67 1684 struct nl_handler nlh;
0ad19a3f 1685 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1686 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1687
3cfc0f3a
MN
1688 err = netlink_open(&nlh, NETLINK_ROUTE);
1689 if (err)
1690 return err;
0ad19a3f 1691
3cfc0f3a 1692 err = -EINVAL;
0ad19a3f 1693 len = strlen(name1);
dae3fdf6 1694 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1695 goto out;
1696
1697 len = strlen(name2);
dae3fdf6 1698 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1699 goto out;
1700
3cfc0f3a 1701 err = -ENOMEM;
0ad19a3f 1702 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1703 if (!nlmsg)
1704 goto out;
1705
06f976ca 1706 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1707 if (!answer)
1708 goto out;
1709
06f976ca 1710 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1711 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1712 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1713
1714 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1715 if (!ifi)
1716 goto out;
06f976ca 1717 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1718
3cfc0f3a 1719 err = -EINVAL;
79e68309 1720 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1721 if (!nest1)
1722 goto out;
1723
1724 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1725 goto out;
1726
1727 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1728 if (!nest2)
1729 goto out;
1730
1731 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1732 if (!nest3)
1733 goto out;
1734
06f976ca 1735 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1736 if (!ifi) {
1737 err = -ENOMEM;
06f976ca 1738 goto out;
25a9939b 1739 }
0ad19a3f 1740
1741 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1742 goto out;
1743
54256301
CB
1744 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
1745 goto out;
1746
1747 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1748 goto out;
1749
0ad19a3f 1750 nla_end_nested(nlmsg, nest3);
0ad19a3f 1751 nla_end_nested(nlmsg, nest2);
0ad19a3f 1752 nla_end_nested(nlmsg, nest1);
1753
1754 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1755 goto out;
1756
3cfc0f3a 1757 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1758out:
1759 netlink_close(&nlh);
1760 nlmsg_free(answer);
1761 nlmsg_free(nlmsg);
1762 return err;
1763}
1764
ebc73a67 1765/* TODO: merge with lxc_macvlan_create */
7c11d57a 1766int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1767{
ebc73a67 1768 int err, len, lindex;
06f976ca 1769 struct ifinfomsg *ifi;
ebc73a67 1770 struct nl_handler nlh;
26c39028 1771 struct rtattr *nest, *nest2;
ebc73a67 1772 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1773
3cfc0f3a
MN
1774 err = netlink_open(&nlh, NETLINK_ROUTE);
1775 if (err)
1776 return err;
26c39028 1777
3cfc0f3a 1778 err = -EINVAL;
26c39028 1779 len = strlen(master);
dae3fdf6 1780 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1781 goto err3;
1782
1783 len = strlen(name);
dae3fdf6 1784 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1785 goto err3;
1786
3cfc0f3a 1787 err = -ENOMEM;
26c39028
JHS
1788 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1789 if (!nlmsg)
1790 goto err3;
1791
06f976ca 1792 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1793 if (!answer)
1794 goto err2;
1795
3cfc0f3a 1796 err = -EINVAL;
26c39028
JHS
1797 lindex = if_nametoindex(master);
1798 if (!lindex)
1799 goto err1;
1800
06f976ca 1801 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1802 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1803 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1804
1805 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1806 if (!ifi) {
1807 err = -ENOMEM;
1808 goto err1;
1809 }
06f976ca 1810 ifi->ifi_family = AF_UNSPEC;
26c39028 1811
79e68309 1812 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1813 if (!nest)
1814 goto err1;
1815
1816 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1817 goto err1;
1818
1819 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1820 if (!nest2)
1821 goto err1;
e892973e 1822
26c39028
JHS
1823 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1824 goto err1;
e892973e 1825
26c39028 1826 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1827 nla_end_nested(nlmsg, nest);
1828
1829 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1830 goto err1;
1831
1832 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1833 goto err1;
1834
3cfc0f3a 1835 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1836err1:
1837 nlmsg_free(answer);
1838err2:
1839 nlmsg_free(nlmsg);
1840err3:
1841 netlink_close(&nlh);
1842 return err;
1843}
1844
e892973e 1845int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1846{
ebc73a67 1847 int err, index, len;
06f976ca 1848 struct ifinfomsg *ifi;
ebc73a67 1849 struct nl_handler nlh;
e892973e 1850 struct rtattr *nest, *nest2;
ebc73a67 1851 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1852
3cfc0f3a
MN
1853 err = netlink_open(&nlh, NETLINK_ROUTE);
1854 if (err)
1855 return err;
0ad19a3f 1856
3cfc0f3a 1857 err = -EINVAL;
0ad19a3f 1858 len = strlen(master);
dae3fdf6 1859 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1860 goto out;
1861
1862 len = strlen(name);
dae3fdf6 1863 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1864 goto out;
1865
3cfc0f3a 1866 err = -ENOMEM;
0ad19a3f 1867 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1868 if (!nlmsg)
1869 goto out;
1870
06f976ca 1871 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1872 if (!answer)
1873 goto out;
1874
3cfc0f3a 1875 err = -EINVAL;
0ad19a3f 1876 index = if_nametoindex(master);
1877 if (!index)
1878 goto out;
1879
06f976ca 1880 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1881 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1882 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1883
1884 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1885 if (!ifi) {
1886 err = -ENOMEM;
1887 goto out;
1888 }
06f976ca 1889 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1890
79e68309 1891 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1892 if (!nest)
1893 goto out;
1894
1895 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1896 goto out;
1897
e892973e
DL
1898 if (mode) {
1899 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1900 if (!nest2)
1901 goto out;
1902
1903 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1904 goto out;
1905
1906 nla_end_nested(nlmsg, nest2);
1907 }
1908
0ad19a3f 1909 nla_end_nested(nlmsg, nest);
1910
1911 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1912 goto out;
1913
1914 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1915 goto out;
1916
3cfc0f3a 1917 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1918out:
1919 netlink_close(&nlh);
1920 nlmsg_free(answer);
1921 nlmsg_free(nlmsg);
1922 return err;
1923}
1924
1925static int proc_sys_net_write(const char *path, const char *value)
1926{
ebc73a67
CB
1927 int fd;
1928 int err = 0;
0ad19a3f 1929
1930 fd = open(path, O_WRONLY);
1931 if (fd < 0)
1932 return -errno;
1933
f640cf46 1934 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1935 err = -errno;
1936
1937 close(fd);
1938 return err;
1939}
1940
6dfa9581 1941static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 1942{
1943 int ret;
1944 char path[PATH_MAX];
6509154d 1945
1946 if (family != AF_INET && family != AF_INET6)
6dfa9581 1947 return -EINVAL;
6509154d 1948
1949 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 1950 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
6509154d 1951 if (ret < 0 || (size_t)ret >= PATH_MAX)
6dfa9581 1952 return -E2BIG;
6509154d 1953
6dfa9581
TP
1954 return proc_sys_net_write(path, flag ? "1" : "0");
1955}
1956
1957int lxc_ip_forwarding_on(const char *name, int family)
1958{
1959 return ip_forwarding_set(name, family, 1);
1960}
1961
1962int lxc_ip_forwarding_off(const char *name, int family)
1963{
1964 return ip_forwarding_set(name, family, 0);
6509154d 1965}
1966
0ad19a3f 1967static int neigh_proxy_set(const char *ifname, int family, int flag)
1968{
9ba8130c 1969 int ret;
419590da 1970 char path[PATH_MAX];
0ad19a3f 1971
1972 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1973 return -EINVAL;
0ad19a3f 1974
419590da 1975 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1976 family == AF_INET ? "ipv4" : "ipv6", ifname,
1977 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1978 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1979 return -E2BIG;
0ad19a3f 1980
ebc73a67 1981 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1982}
1983
6509154d 1984static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1985{
1986 int ret;
1987 char path[PATH_MAX];
1988 char buf[1] = "";
1989
1990 if (family != AF_INET && family != AF_INET6)
596a002c 1991 return ret_set_errno(-1, EINVAL);
6509154d 1992
1993 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1994 family == AF_INET ? "ipv4" : "ipv6", ifname,
1995 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1996 if (ret < 0 || (size_t)ret >= PATH_MAX)
596a002c 1997 return ret_set_errno(-1, E2BIG);
6509154d 1998
1999 return lxc_read_file_expect(path, buf, 1, "1");
2000}
2001
497353b6 2002int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2003{
2004 return neigh_proxy_set(name, family, 1);
2005}
2006
497353b6 2007int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2008{
2009 return neigh_proxy_set(name, family, 0);
2010}
2011
2012int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2013{
1f1b18e7
DL
2014 int i = 0;
2015 unsigned val;
ebc73a67
CB
2016 char c;
2017 unsigned char *data;
1f1b18e7
DL
2018
2019 sockaddr->sa_family = ARPHRD_ETHER;
2020 data = (unsigned char *)sockaddr->sa_data;
2021
2022 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2023 c = *macaddr++;
2024 if (isdigit(c))
2025 val = c - '0';
2026 else if (c >= 'a' && c <= 'f')
2027 val = c - 'a' + 10;
2028 else if (c >= 'A' && c <= 'F')
2029 val = c - 'A' + 10;
2030 else
2031 return -EINVAL;
2032
2033 val <<= 4;
2034 c = *macaddr;
2035 if (isdigit(c))
2036 val |= c - '0';
2037 else if (c >= 'a' && c <= 'f')
2038 val |= c - 'a' + 10;
2039 else if (c >= 'A' && c <= 'F')
2040 val |= c - 'A' + 10;
2041 else if (c == ':' || c == 0)
2042 val >>= 4;
2043 else
2044 return -EINVAL;
2045 if (c != 0)
2046 macaddr++;
2047 *data++ = (unsigned char)(val & 0377);
2048 i++;
2049
2050 if (*macaddr == ':')
2051 macaddr++;
0ad19a3f 2052 }
0ad19a3f 2053
1f1b18e7 2054 return 0;
0ad19a3f 2055}
2056
ebc73a67
CB
2057static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2058 void *acast, int prefix)
0ad19a3f 2059{
ebc73a67 2060 int addrlen, err;
06f976ca 2061 struct ifaddrmsg *ifa;
ebc73a67
CB
2062 struct nl_handler nlh;
2063 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 2064
ebc73a67
CB
2065 addrlen = family == AF_INET ? sizeof(struct in_addr)
2066 : sizeof(struct in6_addr);
4bf1968d 2067
3cfc0f3a
MN
2068 err = netlink_open(&nlh, NETLINK_ROUTE);
2069 if (err)
2070 return err;
0ad19a3f 2071
3cfc0f3a 2072 err = -ENOMEM;
0ad19a3f 2073 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2074 if (!nlmsg)
2075 goto out;
2076
06f976ca 2077 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2078 if (!answer)
2079 goto out;
2080
06f976ca 2081 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2082 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2083 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2084
2085 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2086 if (!ifa)
25a9939b 2087 goto out;
06f976ca
SZ
2088 ifa->ifa_prefixlen = prefix;
2089 ifa->ifa_index = ifindex;
2090 ifa->ifa_family = family;
2091 ifa->ifa_scope = 0;
acf47e1b 2092
3cfc0f3a 2093 err = -EINVAL;
4bf1968d 2094 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 2095 goto out;
2096
4bf1968d 2097 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 2098 goto out;
2099
d8948a52 2100 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
2101 goto out;
2102
ebc73a67 2103 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 2104 err = -EPROTONOSUPPORT;
79881dc6
DL
2105 if (family == AF_INET6 &&
2106 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2107 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 2108 goto out;
0ad19a3f 2109
3cfc0f3a 2110 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 2111out:
2112 netlink_close(&nlh);
2113 nlmsg_free(answer);
2114 nlmsg_free(nlmsg);
2115 return err;
2116}
2117
1f1b18e7 2118int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2119 struct in6_addr *mcast, struct in6_addr *acast,
2120 int prefix)
1f1b18e7
DL
2121{
2122 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2123}
2124
ebc73a67
CB
2125int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2126 int prefix)
1f1b18e7
DL
2127{
2128 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2129}
2130
ebc73a67
CB
2131/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2132 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2133 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2134 */
6ce39620
CB
2135#pragma GCC diagnostic push
2136#pragma GCC diagnostic ignored "-Wcast-align"
2137
ebc73a67
CB
2138static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2139{
2140 int addrlen;
06f976ca
SZ
2141 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2142 struct rtattr *rta = IFA_RTA(ifa);
2143 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2144
06f976ca 2145 if (ifa->ifa_family != family)
19a26f82
MK
2146 return 0;
2147
ebc73a67
CB
2148 addrlen = family == AF_INET ? sizeof(struct in_addr)
2149 : sizeof(struct in6_addr);
19a26f82
MK
2150
2151 /* Loop over the rtattr's in this message */
ebc73a67 2152 while (RTA_OK(rta, attr_len)) {
19a26f82 2153 /* Found a local address for the requested interface,
ebc73a67
CB
2154 * return it.
2155 */
2156 if (rta->rta_type == IFA_LOCAL ||
2157 rta->rta_type == IFA_ADDRESS) {
2158 /* Sanity check. The family check above should make sure
2159 * the address length is correct, but check here just in
2160 * case.
2161 */
19a26f82
MK
2162 if (RTA_PAYLOAD(rta) != addrlen)
2163 return -1;
2164
ebc73a67
CB
2165 /* We might have found an IFA_ADDRESS before, which we
2166 * now overwrite with an IFA_LOCAL.
2167 */
dd66e5ad 2168 if (!*res) {
19a26f82 2169 *res = malloc(addrlen);
dd66e5ad
DE
2170 if (!*res)
2171 return -1;
2172 }
19a26f82
MK
2173
2174 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2175 if (rta->rta_type == IFA_LOCAL)
2176 break;
2177 }
2178 rta = RTA_NEXT(rta, attr_len);
2179 }
2180 return 0;
2181}
2182
6ce39620
CB
2183#pragma GCC diagnostic pop
2184
19a26f82
MK
2185static int ip_addr_get(int family, int ifindex, void **res)
2186{
ebc73a67 2187 int answer_len, err;
06f976ca 2188 struct ifaddrmsg *ifa;
ebc73a67 2189 struct nl_handler nlh;
19a26f82 2190 struct nlmsghdr *msg;
ebc73a67
CB
2191 int readmore = 0, recv_len = 0;
2192 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2193
2194 err = netlink_open(&nlh, NETLINK_ROUTE);
2195 if (err)
2196 return err;
2197
2198 err = -ENOMEM;
2199 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2200 if (!nlmsg)
2201 goto out;
2202
06f976ca 2203 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2204 if (!answer)
2205 goto out;
2206
ebc73a67
CB
2207 /* Save the answer buffer length, since it will be overwritten on the
2208 * first receive (and we might need to receive more than once).
2209 */
06f976ca
SZ
2210 answer_len = answer->nlmsghdr->nlmsg_len;
2211
ebc73a67 2212 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2213 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2214
06f976ca 2215 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2216 if (!ifa)
2217 goto out;
06f976ca 2218 ifa->ifa_family = family;
19a26f82 2219
ebc73a67
CB
2220 /* Send the request for addresses, which returns all addresses on all
2221 * interfaces.
2222 */
19a26f82
MK
2223 err = netlink_send(&nlh, nlmsg);
2224 if (err < 0)
2225 goto out;
19a26f82 2226
6ce39620
CB
2227#pragma GCC diagnostic push
2228#pragma GCC diagnostic ignored "-Wcast-align"
2229
19a26f82
MK
2230 do {
2231 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2232 * overwritten by a previous receive.
2233 */
06f976ca 2234 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2235
ebc73a67 2236 /* Get the (next) batch of reply messages. */
19a26f82
MK
2237 err = netlink_rcv(&nlh, answer);
2238 if (err < 0)
2239 goto out;
2240
2241 recv_len = err;
2242 err = 0;
2243
ebc73a67 2244 /* Satisfy the typing for the netlink macros. */
06f976ca 2245 msg = answer->nlmsghdr;
19a26f82
MK
2246
2247 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2248 /* Stop reading if we see an error message. */
19a26f82 2249 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2250 struct nlmsgerr *errmsg =
2251 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2252 err = errmsg->error;
2253 goto out;
2254 }
2255
ebc73a67 2256 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2257 if (msg->nlmsg_type == NLMSG_DONE) {
2258 readmore = 0;
2259 break;
2260 }
2261
2262 if (msg->nlmsg_type != RTM_NEWADDR) {
2263 err = -1;
2264 goto out;
2265 }
2266
06f976ca
SZ
2267 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2268 if (ifa->ifa_index == ifindex) {
2269 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2270 err = -1;
2271 goto out;
2272 }
2273
ebc73a67 2274 /* Found a result, stop searching. */
19a26f82
MK
2275 if (*res)
2276 goto out;
2277 }
2278
ebc73a67
CB
2279 /* Keep reading more data from the socket if the last
2280 * message had the NLF_F_MULTI flag set.
2281 */
19a26f82
MK
2282 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2283
ebc73a67 2284 /* Look at the next message received in this buffer. */
19a26f82
MK
2285 msg = NLMSG_NEXT(msg, recv_len);
2286 }
2287 } while (readmore);
2288
6ce39620
CB
2289#pragma GCC diagnostic pop
2290
19a26f82 2291 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2292 * error.
2293 */
19a26f82
MK
2294 err = -1;
2295
2296out:
2297 netlink_close(&nlh);
2298 nlmsg_free(answer);
2299 nlmsg_free(nlmsg);
2300 return err;
2301}
2302
2303int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2304{
ebc73a67 2305 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2306}
2307
ebc73a67 2308int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2309{
ebc73a67 2310 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2311}
2312
f8fee0e2
MK
2313static int ip_gateway_add(int family, int ifindex, void *gw)
2314{
ebc73a67 2315 int addrlen, err;
f8fee0e2 2316 struct nl_handler nlh;
06f976ca 2317 struct rtmsg *rt;
ebc73a67 2318 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2319
ebc73a67
CB
2320 addrlen = family == AF_INET ? sizeof(struct in_addr)
2321 : sizeof(struct in6_addr);
f8fee0e2
MK
2322
2323 err = netlink_open(&nlh, NETLINK_ROUTE);
2324 if (err)
2325 return err;
2326
2327 err = -ENOMEM;
2328 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2329 if (!nlmsg)
2330 goto out;
2331
06f976ca 2332 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2333 if (!answer)
2334 goto out;
2335
06f976ca 2336 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2337 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2338 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2339
2340 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2341 if (!rt)
2342 goto out;
06f976ca
SZ
2343 rt->rtm_family = family;
2344 rt->rtm_table = RT_TABLE_MAIN;
2345 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2346 rt->rtm_protocol = RTPROT_BOOT;
2347 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2348 /* "default" destination */
06f976ca 2349 rt->rtm_dst_len = 0;
f8fee0e2
MK
2350
2351 err = -EINVAL;
a2f9a670 2352
2353 /* If gateway address not supplied, then a device route will be created instead */
2354 if (gw != NULL) {
2355 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2356 goto out;
2357 }
f8fee0e2
MK
2358
2359 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2360 * addresses for the gateway.
2361 */
f8fee0e2
MK
2362 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2363 goto out;
2364
2365 err = netlink_transaction(&nlh, nlmsg, answer);
2366out:
2367 netlink_close(&nlh);
2368 nlmsg_free(answer);
2369 nlmsg_free(nlmsg);
2370 return err;
2371}
2372
2373int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2374{
2375 return ip_gateway_add(AF_INET, ifindex, gw);
2376}
2377
2378int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2379{
2380 return ip_gateway_add(AF_INET6, ifindex, gw);
2381}
581c75e7 2382bool is_ovs_bridge(const char *bridge)
0d204771 2383{
ebc73a67 2384 int ret;
0d204771 2385 struct stat sb;
ebc73a67 2386 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2387
ebc73a67
CB
2388 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2389 bridge);
2390 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2391 return false;
2392
2393 ret = stat(brdirname, &sb);
2394 if (ret < 0 && errno == ENOENT)
0d204771 2395 return true;
ebc73a67 2396
0d204771
SH
2397 return false;
2398}
2399
581c75e7
CB
2400struct ovs_veth_args {
2401 const char *bridge;
2402 const char *nic;
2403};
2404
cb0dc11b
CB
2405/* Called from a background thread - when nic goes away, remove it from the
2406 * bridge.
c43cbc04 2407 */
581c75e7 2408static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2409{
581c75e7 2410 struct ovs_veth_args *args = data;
cb0dc11b 2411
581c75e7
CB
2412 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2413 (char *)NULL);
2414 return -1;
c43cbc04
SH
2415}
2416
581c75e7 2417int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2418{
c43cbc04 2419 int ret;
419590da 2420 char cmd_output[PATH_MAX];
581c75e7 2421 struct ovs_veth_args args;
6ad22d06 2422
581c75e7
CB
2423 args.bridge = bridge;
2424 args.nic = nic;
2425 ret = run_command(cmd_output, sizeof(cmd_output),
2426 lxc_ovs_delete_port_exec, (void *)&args);
2427 if (ret < 0) {
2428 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
53796b94 2429 "%s", nic, bridge, cmd_output);
6ad22d06 2430 return -1;
581c75e7 2431 }
0d204771 2432
581c75e7
CB
2433 return 0;
2434}
ebc73a67 2435
581c75e7
CB
2436static int lxc_ovs_attach_bridge_exec(void *data)
2437{
2438 struct ovs_veth_args *args = data;
ebc73a67 2439
581c75e7
CB
2440 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2441 (char *)NULL);
2442 return -1;
2443}
ebc73a67 2444
581c75e7
CB
2445static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2446{
2447 int ret;
419590da 2448 char cmd_output[PATH_MAX];
581c75e7 2449 struct ovs_veth_args args;
ebc73a67 2450
581c75e7
CB
2451 args.bridge = bridge;
2452 args.nic = nic;
2453 ret = run_command(cmd_output, sizeof(cmd_output),
2454 lxc_ovs_attach_bridge_exec, (void *)&args);
2455 if (ret < 0) {
2456 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
53796b94 2457 nic, bridge, cmd_output);
581c75e7 2458 return -1;
c43cbc04 2459 }
0d204771 2460
581c75e7 2461 return 0;
0d204771 2462}
0d204771 2463
581c75e7 2464int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2465{
ebc73a67 2466 int err, fd, index;
9de31d5a 2467 size_t retlen;
0ad19a3f 2468 struct ifreq ifr;
2469
dae3fdf6 2470 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2471 return -EINVAL;
0ad19a3f 2472
2473 index = if_nametoindex(ifname);
2474 if (!index)
3cfc0f3a 2475 return -EINVAL;
0ad19a3f 2476
0d204771 2477 if (is_ovs_bridge(bridge))
581c75e7 2478 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2479
ad9429e5 2480 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2481 if (fd < 0)
3cfc0f3a 2482 return -errno;
0ad19a3f 2483
9de31d5a 2484 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2485 if (retlen >= IFNAMSIZ) {
2486 close(fd);
9de31d5a 2487 return -E2BIG;
42cc4083 2488 }
9de31d5a 2489
ebc73a67 2490 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2491 ifr.ifr_ifindex = index;
7d163508 2492 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2493 close(fd);
3cfc0f3a
MN
2494 if (err)
2495 err = -errno;
0ad19a3f 2496
2497 return err;
2498}
72d0e1cb 2499
ebc73a67 2500static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2501 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2502 [LXC_NET_VETH] = "veth",
2503 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2504 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2505 [LXC_NET_PHYS] = "phys",
b343592b
BP
2506 [LXC_NET_VLAN] = "vlan",
2507 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2508};
2509
2510const char *lxc_net_type_to_str(int type)
2511{
2512 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2513 return NULL;
ebc73a67 2514
72d0e1cb
SG
2515 return lxc_network_types[type];
2516}
8befa924 2517
ebc73a67 2518static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2519
966e9f1f 2520char *lxc_mkifname(char *template)
a0265685 2521{
2d7bf744 2522 int ret;
b1e44ed1 2523 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2524 char name[IFNAMSIZ];
2525 bool exists = false;
2526 size_t i = 0;
280cc35f 2527#ifdef HAVE_RAND_R
2528 unsigned int seed;
2529
2530 seed = randseed(false);
2531#else
2532
2533 (void)randseed(true);
2534#endif
a0265685 2535
535e8859
CB
2536 if (strlen(template) >= IFNAMSIZ)
2537 return NULL;
2538
ebc73a67 2539 /* Get all the network interfaces. */
b1e44ed1 2540 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2541 if (ret < 0) {
6d1400b5 2542 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2543 return NULL;
2544 }
a0265685 2545
ebc73a67 2546 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2547 for (;;) {
966e9f1f 2548 name[0] = '\0';
94b1cade 2549 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2550
966e9f1f 2551 exists = false;
280cc35f 2552
a0265685
SG
2553 for (i = 0; i < strlen(name); i++) {
2554 if (name[i] == 'X') {
2555#ifdef HAVE_RAND_R
8523344a 2556 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2557#else
8523344a 2558 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2559#endif
2560 }
2561 }
2562
2563 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2564 if (!strcmp(ifa->ifa_name, name)) {
2565 exists = true;
a0265685
SG
2566 break;
2567 }
2568 }
2569
966e9f1f 2570 if (!exists)
a0265685 2571 break;
a0265685
SG
2572 }
2573
b1e44ed1 2574 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2575 (void)strlcpy(template, name, strlen(template) + 1);
2576
2577 return template;
a0265685
SG
2578}
2579
8befa924
SH
2580int setup_private_host_hw_addr(char *veth1)
2581{
ebc73a67 2582 int err, sockfd;
8befa924 2583 struct ifreq ifr;
8befa924 2584
ad9429e5 2585 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2586 if (sockfd < 0)
2587 return -errno;
2588
ebc73a67 2589 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2590 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2591 close(sockfd);
ebc73a67 2592 return -E2BIG;
87c6e5db 2593 }
ebc73a67 2594
8befa924
SH
2595 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2596 if (err < 0) {
8befa924 2597 close(sockfd);
8befa924
SH
2598 return -errno;
2599 }
2600
2601 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2602 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2603 close(sockfd);
8befa924
SH
2604 if (err < 0)
2605 return -errno;
2606
2607 return 0;
2608}
811ef482
CB
2609
2610int lxc_find_gateway_addresses(struct lxc_handler *handler)
2611{
2612 struct lxc_list *network = &handler->conf->network;
2613 struct lxc_list *iterator;
2614 struct lxc_netdev *netdev;
2615 int link_index;
2616
2617 lxc_list_for_each(iterator, network) {
2618 netdev = iterator->elem;
2619
2620 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2621 continue;
2622
2623 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
03ca4af8 2624 ERROR("Automatic gateway detection is only supported for veth and macvlan");
811ef482
CB
2625 return -1;
2626 }
2627
de4855a8 2628 if (netdev->link[0] == '\0') {
811ef482
CB
2629 ERROR("Automatic gateway detection needs a link interface");
2630 return -1;
2631 }
2632
2633 link_index = if_nametoindex(netdev->link);
2634 if (!link_index)
2635 return -EINVAL;
2636
2637 if (netdev->ipv4_gateway_auto) {
2638 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
03ca4af8
TP
2639 ERROR("Failed to automatically find ipv4 gateway address from link interface \"%s\"",
2640 netdev->link);
811ef482
CB
2641 return -1;
2642 }
2643 }
2644
2645 if (netdev->ipv6_gateway_auto) {
2646 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
03ca4af8
TP
2647 ERROR("Failed to automatically find ipv6 gateway address from link interface \"%s\"",
2648 netdev->link);
811ef482
CB
2649 return -1;
2650 }
2651 }
2652 }
2653
2654 return 0;
2655}
2656
2657#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2658static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2659 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2660{
2661 int ret;
2662 pid_t child;
2663 int bytes, pipefd[2];
2664 char *token, *saveptr = NULL;
095ead80 2665 char netdev_link[IFNAMSIZ];
419590da 2666 char buffer[PATH_MAX] = {0};
94b1cade 2667 size_t retlen;
811ef482
CB
2668
2669 if (netdev->type != LXC_NET_VETH) {
2670 ERROR("Network type %d not support for unprivileged use", netdev->type);
2671 return -1;
2672 }
2673
2674 ret = pipe(pipefd);
2675 if (ret < 0) {
2676 SYSERROR("Failed to create pipe");
2677 return -1;
2678 }
2679
2680 child = fork();
2681 if (child < 0) {
2682 SYSERROR("Failed to create new process");
2683 close(pipefd[0]);
2684 close(pipefd[1]);
2685 return -1;
2686 }
2687
2688 if (child == 0) {
8335fd40 2689 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2690
2691 close(pipefd[0]);
2692
2693 ret = dup2(pipefd[1], STDOUT_FILENO);
2694 if (ret >= 0)
2695 ret = dup2(pipefd[1], STDERR_FILENO);
2696 close(pipefd[1]);
2697 if (ret < 0) {
2698 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2699 _exit(EXIT_FAILURE);
811ef482
CB
2700 }
2701
de4855a8 2702 if (netdev->link[0] != '\0')
9de31d5a 2703 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2704 else
9de31d5a
CB
2705 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2706 if (retlen >= IFNAMSIZ) {
2707 SYSERROR("Invalid network device name");
2708 _exit(EXIT_FAILURE);
2709 }
811ef482 2710
8335fd40
CB
2711 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2712 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2713 _exit(EXIT_FAILURE);
8335fd40 2714 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2715
2716 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2717 lxcname, pidstr, netdev_link,
de4855a8
CB
2718 netdev->name[0] != '\0' ? netdev->name : "(null)");
2719 if (netdev->name[0] != '\0')
811ef482
CB
2720 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2721 lxcpath, lxcname, pidstr, "veth", netdev_link,
2722 netdev->name, (char *)NULL);
2723 else
2724 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2725 lxcpath, lxcname, pidstr, "veth", netdev_link,
2726 (char *)NULL);
2727 SYSERROR("Failed to execute lxc-user-nic");
78070056 2728 _exit(EXIT_FAILURE);
811ef482
CB
2729 }
2730
2731 /* close the write-end of the pipe */
2732 close(pipefd[1]);
2733
419590da 2734 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2735 if (bytes < 0) {
74c6e2b0 2736 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2737 close(pipefd[0]);
6b9f82a9
CB
2738 } else {
2739 buffer[bytes - 1] = '\0';
811ef482 2740 }
811ef482
CB
2741
2742 ret = wait_for_pid(child);
2743 close(pipefd[0]);
6b9f82a9 2744 if (ret != 0 || bytes < 0) {
811ef482
CB
2745 ERROR("lxc-user-nic failed to configure requested network: %s",
2746 buffer[0] != '\0' ? buffer : "(null)");
2747 return -1;
2748 }
2749 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2750
2751 /* netdev->name */
2752 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2753 if (!token) {
2754 ERROR("Failed to parse lxc-user-nic output");
811ef482 2755 return -1;
74c6e2b0 2756 }
811ef482 2757
e389f2af
CB
2758 /*
2759 * lxc-user-nic will take care of proper network device naming. So
2760 * netdev->name and netdev->created_name need to be identical to not
2761 * trigger another rename later on.
2762 */
2763 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2764 if (retlen < IFNAMSIZ)
2765 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2766 if (retlen >= IFNAMSIZ) {
2767 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2768 return -E2BIG;
2769 }
811ef482 2770
74c6e2b0 2771 /* netdev->ifindex */
811ef482 2772 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2773 if (!token) {
2774 ERROR("Failed to parse lxc-user-nic output");
811ef482 2775 return -1;
74c6e2b0 2776 }
811ef482 2777
74c6e2b0
CB
2778 ret = lxc_safe_int(token, &netdev->ifindex);
2779 if (ret < 0) {
6d1400b5 2780 errno = -ret;
2781 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2782 return -1;
2783 }
2784
74c6e2b0 2785 /* netdev->priv.veth_attr.veth1 */
811ef482 2786 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2787 if (!token) {
2788 ERROR("Failed to parse lxc-user-nic output");
811ef482 2789 return -1;
74c6e2b0 2790 }
811ef482 2791
94b1cade
DJ
2792 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2793 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2794 ERROR("Host side veth device name returned by lxc-user-nic is "
2795 "too long");
2796 return -E2BIG;
2797 }
74c6e2b0
CB
2798
2799 /* netdev->priv.veth_attr.ifindex */
2800 token = strtok_r(NULL, ":", &saveptr);
2801 if (!token) {
2802 ERROR("Failed to parse lxc-user-nic output");
2803 return -1;
2804 }
2805
2806 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2807 if (ret < 0) {
6d1400b5 2808 errno = -ret;
2809 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2810 return -1;
2811 }
2812
4d781681 2813 if (netdev->upscript) {
2814 char *argv[] = {
2815 "veth",
2816 netdev->link,
2817 netdev->priv.veth_attr.veth1,
2818 NULL,
2819 };
2820
e389f2af
CB
2821 ret = run_script_argv(lxcname, hooks_version, "net",
2822 netdev->upscript, "up", argv);
4d781681 2823 if (ret < 0)
2824 return -1;
2825 }
2826
811ef482
CB
2827 return 0;
2828}
2829
f0ecc19d 2830static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2831 struct lxc_netdev *netdev,
2832 const char *netns_path)
811ef482
CB
2833{
2834 int bytes, ret;
2835 pid_t child;
2836 int pipefd[2];
419590da 2837 char buffer[PATH_MAX] = {0};
811ef482
CB
2838
2839 if (netdev->type != LXC_NET_VETH) {
2840 ERROR("Network type %d not support for unprivileged use", netdev->type);
2841 return -1;
2842 }
2843
2844 ret = pipe(pipefd);
2845 if (ret < 0) {
2846 SYSERROR("Failed to create pipe");
2847 return -1;
2848 }
2849
2850 child = fork();
2851 if (child < 0) {
2852 SYSERROR("Failed to create new process");
2853 close(pipefd[0]);
2854 close(pipefd[1]);
2855 return -1;
2856 }
2857
2858 if (child == 0) {
8843fde4 2859 char *hostveth;
811ef482
CB
2860
2861 close(pipefd[0]);
2862
2863 ret = dup2(pipefd[1], STDOUT_FILENO);
2864 if (ret >= 0)
2865 ret = dup2(pipefd[1], STDERR_FILENO);
2866 close(pipefd[1]);
2867 if (ret < 0) {
2868 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2869 _exit(EXIT_FAILURE);
811ef482
CB
2870 }
2871
8843fde4
CB
2872 if (netdev->priv.veth_attr.pair[0] != '\0')
2873 hostveth = netdev->priv.veth_attr.pair;
2874 else
2875 hostveth = netdev->priv.veth_attr.veth1;
2876 if (hostveth[0] == '\0') {
74c6e2b0 2877 SYSERROR("Host side veth device name is missing");
a30b9023 2878 _exit(EXIT_FAILURE);
74c6e2b0
CB
2879 }
2880
de4855a8 2881 if (netdev->link[0] == '\0') {
811ef482 2882 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2883 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2884 _exit(EXIT_FAILURE);
74c6e2b0 2885 }
811ef482 2886
811ef482 2887 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2888 lxcname, netns_path, netdev->link, hostveth);
811ef482 2889 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2890 lxcname, netns_path, "veth", netdev->link, hostveth,
2891 (char *)NULL);
811ef482 2892 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2893 _exit(EXIT_FAILURE);
811ef482
CB
2894 }
2895
2896 close(pipefd[1]);
2897
419590da 2898 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2899 if (bytes < 0) {
2900 SYSERROR("Failed to read from pipe file descriptor.");
2901 close(pipefd[0]);
6b9f82a9
CB
2902 } else {
2903 buffer[bytes - 1] = '\0';
811ef482 2904 }
811ef482 2905
6b9f82a9
CB
2906 ret = wait_for_pid(child);
2907 close(pipefd[0]);
2908 if (ret != 0 || bytes < 0) {
811ef482
CB
2909 ERROR("lxc-user-nic failed to delete requested network: %s",
2910 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2911 return -1;
2912 }
2913
811ef482
CB
2914 return 0;
2915}
2916
1bd8d726
CB
2917bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2918{
2919 int ret;
2920 struct lxc_list *iterator;
2921 struct lxc_list *network = &handler->conf->network;
2922 /* strlen("/proc/") = 6
2923 * +
8335fd40 2924 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2925 * +
2926 * strlen("/fd/") = 4
2927 * +
8335fd40 2928 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2929 * +
2930 * \0
2931 */
8335fd40 2932 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2933
2934 *netns_path = '\0';
2935
28d9e29e 2936 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2937 DEBUG("Cannot not guarantee safe deletion of network devices. "
2938 "Manual cleanup maybe needed");
2939 return false;
2940 }
2941
2942 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2943 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2944 if (ret < 0 || ret >= sizeof(netns_path))
2945 return false;
2946
2947 lxc_list_for_each(iterator, network) {
2948 char *hostveth = NULL;
2949 struct lxc_netdev *netdev = iterator->elem;
2950
2951 /* We can only delete devices whose ifindex we have. If we don't
2952 * have the index it means that we didn't create it.
2953 */
2954 if (!netdev->ifindex)
2955 continue;
2956
2957 if (netdev->type == LXC_NET_PHYS) {
2958 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2959 netdev->link);
2960 if (ret < 0)
2961 WARN("Failed to rename interface with index %d "
2962 "to its initial name \"%s\"",
2963 netdev->ifindex, netdev->link);
2964 else
2965 TRACE("Renamed interface with index %d to its "
2966 "initial name \"%s\"",
2967 netdev->ifindex, netdev->link);
b3259dc6
TP
2968
2969 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2970 goto clear_ifindices;
1bd8d726
CB
2971 }
2972
2973 ret = netdev_deconf[netdev->type](handler, netdev);
2974 if (ret < 0)
2975 WARN("Failed to deconfigure network device");
2976
2977 if (netdev->type != LXC_NET_VETH)
66a7c406 2978 goto clear_ifindices;
1bd8d726 2979
c869be20 2980 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2981 goto clear_ifindices;
1bd8d726 2982
8843fde4
CB
2983 if (netdev->priv.veth_attr.pair[0] != '\0')
2984 hostveth = netdev->priv.veth_attr.pair;
2985 else
2986 hostveth = netdev->priv.veth_attr.veth1;
2987 if (hostveth[0] == '\0')
66a7c406 2988 goto clear_ifindices;
8843fde4 2989
1bd8d726
CB
2990 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2991 handler->name, netdev,
2992 netns_path);
2993 if (ret < 0) {
1bd8d726 2994 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2995 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2996 goto clear_ifindices;
1bd8d726
CB
2997 }
2998 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2999 netdev->link);
66a7c406
CB
3000
3001clear_ifindices:
0858c829
CB
3002 /*
3003 * We need to clear any ifindices we recorded so liblxc won't
3004 * have cached stale data which would cause it to fail on
3005 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3006 */
3007 netdev->ifindex = 0;
3008 if (netdev->type == LXC_NET_PHYS) {
3009 netdev->priv.phys_attr.ifindex = 0;
3010 } else if (netdev->type == LXC_NET_VETH) {
3011 netdev->priv.veth_attr.veth1[0] = '\0';
3012 netdev->priv.veth_attr.ifindex = 0;
3013 }
1bd8d726
CB
3014 }
3015
bb84beda 3016 return true;
1bd8d726
CB
3017}
3018
6509154d 3019static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3020 struct lxc_list *cur, *next;
3021 struct lxc_inetdev *inet4dev;
3022 struct lxc_inet6dev *inet6dev;
3023 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3024 int err = 0;
3025 unsigned int lo_ifindex = 0;
6509154d 3026
3027 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3028 if (!lxc_list_empty(&netdev->ipv4)) {
3029 /* Check for net.ipv4.conf.[link].forwarding=1 */
3030 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
3031 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
596a002c 3032 return ret_set_errno(-1, EINVAL);
6509154d 3033 }
3034 }
3035
3036 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3037 if (!lxc_list_empty(&netdev->ipv6)) {
3038 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3039 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
3040 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
596a002c 3041 return ret_set_errno(-1, EINVAL);
6509154d 3042 }
3043
3044 /* Check for net.ipv6.conf.[link].forwarding=1 */
3045 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
3046 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
596a002c 3047 return ret_set_errno(-1, EINVAL);
6509154d 3048 }
3049 }
3050
b670016a 3051 /* Perform IPVLAN specific checks. */
3052 if (netdev->type == LXC_NET_IPVLAN) {
3053 /* Check mode is l3s as other modes do not work with l2proxy. */
3054 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
3055 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
596a002c 3056 return ret_set_errno(-1, EINVAL);
b670016a 3057 }
3058
3059 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3060 lo_ifindex = if_nametoindex(loop_device);
b670016a 3061 if (lo_ifindex == 0) {
3ebffb98 3062 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
596a002c 3063 return ret_set_errno(-1, EINVAL);
b670016a 3064 }
3065 }
3066
6509154d 3067 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3068 inet4dev = cur->elem;
3069 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3070 return ret_set_errno(-1, -errno);
6509154d 3071
3072 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
596a002c 3073 return ret_set_errno(-1, EINVAL);
b670016a 3074
3075 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3076 if (netdev->type == LXC_NET_IPVLAN) {
3077 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
3078 if (err < 0) {
3ebffb98 3079 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
596a002c 3080 return ret_set_errno(-1, -err);
b670016a 3081 }
3082 }
6509154d 3083 }
3084
3085 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3086 inet6dev = cur->elem;
3087 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3088 return ret_set_errno(-1, -errno);
6509154d 3089
3090 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
596a002c 3091 return ret_set_errno(-1, EINVAL);
b670016a 3092
3093 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3094 if (netdev->type == LXC_NET_IPVLAN) {
3095 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3096 if (err < 0) {
3ebffb98 3097 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
596a002c 3098 return ret_set_errno(-1, -err);
b670016a 3099 }
3100 }
6509154d 3101 }
3102
3103 return 0;
3104}
3105
b670016a 3106static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3107 char bufinet4[INET_ADDRSTRLEN];
3108 unsigned int errCount = 0;
3109
3110 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3111 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
596a002c 3112 return ret_set_errno(-1, EINVAL);
b670016a 3113 }
3114
3115 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3116 if (lo_ifindex > 0) {
3117 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3118 errCount++;
3119 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3120 }
3121 }
3122
3123 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3124 if (link[0] != '\0') {
3125 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3126 errCount++;
3127 }
3128
3129 if (errCount > 0)
596a002c 3130 return ret_set_errno(-1, EINVAL);
b670016a 3131
3132 return 0;
3133}
3134
3135static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3136 char bufinet6[INET6_ADDRSTRLEN];
3137 unsigned int errCount = 0;
3138
3139 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3140 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
596a002c 3141 return ret_set_errno(-1, EINVAL);
b670016a 3142 }
3143
3144 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3145 if (lo_ifindex > 0) {
3146 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3147 errCount++;
3148 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3149 }
3150 }
3151
3152 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3153 if (link[0] != '\0') {
3154 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3155 errCount++;
3156 }
3157
3158 if (errCount > 0)
596a002c 3159 return ret_set_errno(-1, EINVAL);
b670016a 3160
3161 return 0;
3162}
3163
6509154d 3164static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3165 unsigned int lo_ifindex = 0;
3166 unsigned int errCount = 0;
6509154d 3167 struct lxc_list *cur, *next;
3168 struct lxc_inetdev *inet4dev;
3169 struct lxc_inet6dev *inet6dev;
6509154d 3170
b670016a 3171 /* Perform IPVLAN specific checks. */
3172 if (netdev->type == LXC_NET_IPVLAN) {
3173 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3174 lo_ifindex = if_nametoindex(loop_device);
b670016a 3175 if (lo_ifindex == 0) {
3176 errCount++;
3ebffb98 3177 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3178 }
b670016a 3179 }
6509154d 3180
b670016a 3181 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3182 inet4dev = cur->elem;
3183 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3184 errCount++;
6509154d 3185 }
3186
3187 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3188 inet6dev = cur->elem;
b670016a 3189 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3190 errCount++;
6509154d 3191 }
3192
b670016a 3193 if (errCount > 0)
596a002c 3194 return ret_set_errno(-1, EINVAL);
6509154d 3195
3196 return 0;
3197}
3198
e389f2af 3199static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3200{
811ef482
CB
3201 struct lxc_list *iterator;
3202 struct lxc_list *network = &handler->conf->network;
3203
811ef482
CB
3204 lxc_list_for_each(iterator, network) {
3205 struct lxc_netdev *netdev = iterator->elem;
3206
3207 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3208 ERROR("Invalid network configuration type %d", netdev->type);
3209 return -1;
3210 }
3211
6509154d 3212 /* Setup l2proxy entries if enabled and used with a link property */
3213 if (netdev->l2proxy && netdev->link[0] != '\0') {
3214 if (lxc_setup_l2proxy(netdev)) {
3215 ERROR("Failed to setup l2proxy");
3216 return -1;
3217 }
3218 }
3219
811ef482
CB
3220 if (netdev_conf[netdev->type](handler, netdev)) {
3221 ERROR("Failed to create network device");
3222 return -1;
3223 }
811ef482
CB
3224 }
3225
3226 return 0;
3227}
3228
e389f2af 3229int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3230{
e389f2af
CB
3231 pid_t pid = handler->pid;
3232 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3233 struct lxc_list *iterator;
3234
e0010464 3235 if (am_guest_unpriv())
74c6e2b0 3236 return 0;
811ef482
CB
3237
3238 lxc_list_for_each(iterator, network) {
3dd78294 3239 __do_free char *physname = NULL;
e389f2af 3240 int ret;
811ef482
CB
3241 struct lxc_netdev *netdev = iterator->elem;
3242
811ef482
CB
3243 if (!netdev->ifindex)
3244 continue;
3245
3dd78294
CB
3246 if (netdev->type == LXC_NET_PHYS)
3247 physname = is_wlan(netdev->link);
3248
3249 if (physname)
3250 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3251 else
3252 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
535e8859 3253 if (ret) {
6d1400b5 3254 errno = -ret;
24190194
CB
3255 SYSERROR("Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3256 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3257 return -1;
3258 }
3259
24190194
CB
3260 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3261 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3262 }
3263
3264 return 0;
3265}
3266
3c09b97c
CB
3267static int network_requires_advanced_setup(int type)
3268{
3269 if (type == LXC_NET_EMPTY)
3270 return false;
3271
3272 if (type == LXC_NET_NONE)
3273 return false;
3274
3275 return true;
3276}
3277
e389f2af 3278static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3279{
e389f2af
CB
3280 int hooks_version = handler->conf->hooks_version;
3281 const char *lxcname = handler->name;
3282 const char *lxcpath = handler->lxcpath;
3283 struct lxc_list *network = &handler->conf->network;
3284 pid_t pid = handler->pid;
74c6e2b0
CB
3285 struct lxc_list *iterator;
3286
74c6e2b0
CB
3287 lxc_list_for_each(iterator, network) {
3288 struct lxc_netdev *netdev = iterator->elem;
3289
3c09b97c 3290 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3291 continue;
3292
3293 if (netdev->type != LXC_NET_VETH) {
e389f2af 3294 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3295 lxc_net_type_to_str(netdev->type));
3296 return -1;
3297 }
3298
3299 if (netdev->mtu)
3300 INFO("mtu ignored due to insufficient privilege");
3301
e389f2af
CB
3302 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3303 pid, hooks_version))
74c6e2b0
CB
3304 return -1;
3305 }
3306
3307 return 0;
3308}
3309
1bd8d726 3310bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3311{
3312 int ret;
3313 struct lxc_list *iterator;
3314 struct lxc_list *network = &handler->conf->network;
1bd8d726 3315
811ef482
CB
3316 lxc_list_for_each(iterator, network) {
3317 char *hostveth = NULL;
3318 struct lxc_netdev *netdev = iterator->elem;
3319
3320 /* We can only delete devices whose ifindex we have. If we don't
3321 * have the index it means that we didn't create it.
3322 */
3323 if (!netdev->ifindex)
3324 continue;
3325
0104c121
CB
3326 /*
3327 * If the network device has been moved back from the
3328 * containers network namespace, update the ifindex.
3329 */
3330 netdev->ifindex = if_nametoindex(netdev->name);
3331
6509154d 3332 /* Delete l2proxy entries if enabled and used with a link property */
3333 if (netdev->l2proxy && netdev->link[0] != '\0') {
3334 if (lxc_delete_l2proxy(netdev))
3335 WARN("Failed to delete all l2proxy config");
3336 /* Don't return, let the network be cleaned up as normal. */
3337 }
3338
811ef482
CB
3339 if (netdev->type == LXC_NET_PHYS) {
3340 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3341 if (ret < 0)
3342 WARN("Failed to rename interface with index %d "
b809f232
CB
3343 "from \"%s\" to its initial name \"%s\"",
3344 netdev->ifindex, netdev->name, netdev->link);
0b154989 3345 else {
29589196
CB
3346 TRACE("Renamed interface with index %d from "
3347 "\"%s\" to its initial name \"%s\"",
3348 netdev->ifindex, netdev->name,
3349 netdev->link);
0b154989
TP
3350
3351 /* Restore original MTU */
3352 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3353 if (ret < 0) {
3354 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3355 netdev->link, netdev->priv.phys_attr.mtu);
3356 } else {
3357 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3358 netdev->link, netdev->priv.phys_attr.mtu);
3359 }
3360 }
b3259dc6
TP
3361
3362 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3363 goto clear_ifindices;
811ef482
CB
3364 }
3365
3366 ret = netdev_deconf[netdev->type](handler, netdev);
3367 if (ret < 0)
3368 WARN("Failed to deconfigure network device");
3369
811ef482 3370 if (netdev->type != LXC_NET_VETH)
66a7c406 3371 goto clear_ifindices;
811ef482 3372
811ef482
CB
3373 /* Explicitly delete host veth device to prevent lingering
3374 * devices. We had issues in LXD around this.
3375 */
de4855a8 3376 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3377 hostveth = netdev->priv.veth_attr.pair;
3378 else
3379 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3380 if (hostveth[0] == '\0')
66a7c406 3381 goto clear_ifindices;
811ef482
CB
3382
3383 ret = lxc_netdev_delete_by_name(hostveth);
3384 if (ret < 0) {
24548539
CB
3385 WARN("Failed to remove interface \"%s\" from \"%s\"",
3386 hostveth, netdev->link);
66a7c406 3387 goto clear_ifindices;
811ef482
CB
3388 }
3389 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3390
c869be20 3391 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3392 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3393 netdev->ifindex = 0;
3394 netdev->priv.veth_attr.ifindex = 0;
3395 goto clear_ifindices;
811ef482
CB
3396 }
3397
3398 /* Delete the openvswitch port. */
3399 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3400 if (ret < 0)
3401 WARN("Failed to remove port \"%s\" from openvswitch "
3402 "bridge \"%s\"", hostveth, netdev->link);
3403 else
3404 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3405 hostveth, netdev->link);
3406
66a7c406 3407clear_ifindices:
ad2ddfcd 3408 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3409 * have cached stale data which would cause it to fail on reboot
3410 * we're we don't re-read the on-disk config file.
3411 */
3412 netdev->ifindex = 0;
3413 if (netdev->type == LXC_NET_PHYS) {
3414 netdev->priv.phys_attr.ifindex = 0;
3415 } else if (netdev->type == LXC_NET_VETH) {
3416 netdev->priv.veth_attr.veth1[0] = '\0';
3417 netdev->priv.veth_attr.ifindex = 0;
3418 }
811ef482
CB
3419 }
3420
bb84beda 3421 return true;
811ef482
CB
3422}
3423
3424int lxc_requests_empty_network(struct lxc_handler *handler)
3425{
3426 struct lxc_list *network = &handler->conf->network;
3427 struct lxc_list *iterator;
3428 bool found_none = false, found_nic = false;
3429
3430 if (lxc_list_empty(network))
3431 return 0;
3432
3433 lxc_list_for_each(iterator, network) {
3434 struct lxc_netdev *netdev = iterator->elem;
3435
3436 if (netdev->type == LXC_NET_NONE)
3437 found_none = true;
3438 else
3439 found_nic = true;
3440 }
3441 if (found_none && !found_nic)
3442 return 1;
3443 return 0;
3444}
3445
3446/* try to move physical nics to the init netns */
b809f232 3447int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3448{
3449 int ret;
b809f232 3450 int oldfd;
811ef482 3451 char ifname[IFNAMSIZ];
b809f232 3452 struct lxc_list *iterator;
28d9e29e 3453 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3454 struct lxc_conf *conf = handler->conf;
811ef482 3455
b809f232
CB
3456 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3457 * the parent network namespace. We won't have this capability if we are
3458 * unprivileged.
3459 */
d0fbc7ba 3460 if (!handler->am_root)
b809f232 3461 return 0;
811ef482 3462
b809f232 3463 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3464
0037ab49 3465 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3466 if (oldfd < 0) {
3467 SYSERROR("Failed to preserve network namespace");
b809f232 3468 return -1;
811ef482
CB
3469 }
3470
b809f232 3471 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3472 if (ret < 0) {
3473 SYSERROR("Failed to enter network namespace");
3474 close(oldfd);
b809f232 3475 return -1;
811ef482
CB
3476 }
3477
b809f232
CB
3478 lxc_list_for_each(iterator, &conf->network) {
3479 struct lxc_netdev *netdev = iterator->elem;
811ef482 3480
b809f232
CB
3481 if (netdev->type != LXC_NET_PHYS)
3482 continue;
3483
3484 /* Retrieve the name of the interface in the container's network
3485 * namespace.
3486 */
3487 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3488 WARN("No interface corresponding to ifindex %d",
b809f232 3489 netdev->ifindex);
811ef482
CB
3490 continue;
3491 }
b809f232 3492
0037ab49 3493 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3494 if (ret < 0)
811ef482
CB
3495 WARN("Error moving network device \"%s\" back to "
3496 "network namespace", ifname);
b809f232
CB
3497 else
3498 TRACE("Moved network device \"%s\" back to network "
3499 "namespace", ifname);
811ef482 3500 }
811ef482 3501
b809f232 3502 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3503 close(oldfd);
b809f232
CB
3504 if (ret < 0) {
3505 SYSERROR("Failed to enter network namespace");
3506 return -1;
3507 }
3508
3509 return 0;
811ef482
CB
3510}
3511
3512static int setup_hw_addr(char *hwaddr, const char *ifname)
3513{
3514 struct sockaddr sockaddr;
3515 struct ifreq ifr;
6d1400b5 3516 int ret, fd;
811ef482
CB
3517
3518 ret = lxc_convert_mac(hwaddr, &sockaddr);
3519 if (ret) {
6d1400b5 3520 errno = -ret;
3521 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3522 return -1;
3523 }
3524
3525 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3526 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3527 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3528
ad9429e5 3529 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3530 if (fd < 0)
3531 return -1;
3532
3533 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3534 if (ret)
6d1400b5 3535 SYSERROR("Failed to perform ioctl");
3536
3537 close(fd);
811ef482
CB
3538
3539 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3540 ifr.ifr_name);
3541
3542 return ret;
3543}
3544
3545static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3546{
3547 struct lxc_list *iterator;
3548 int err;
3549
3550 lxc_list_for_each(iterator, ip) {
3551 struct lxc_inetdev *inetdev = iterator->elem;
3552
3553 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3554 &inetdev->bcast, inetdev->prefix);
3555 if (err) {
6d1400b5 3556 errno = -err;
3557 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3558 "with ifindex %d", ifindex);
811ef482
CB
3559 return -1;
3560 }
3561 }
3562
3563 return 0;
3564}
3565
3566static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3567{
3568 struct lxc_list *iterator;
3569 int err;
3570
3571 lxc_list_for_each(iterator, ip) {
3572 struct lxc_inet6dev *inet6dev = iterator->elem;
3573
3574 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3575 &inet6dev->mcast, &inet6dev->acast,
3576 inet6dev->prefix);
3577 if (err) {
6d1400b5 3578 errno = -err;
3579 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3580 "with ifindex %d", ifindex);
811ef482
CB
3581 return -1;
3582 }
3583 }
3584
3585 return 0;
3586}
3587
3588static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3589{
3590 char ifname[IFNAMSIZ];
3591 int err;
811ef482 3592 char *current_ifname = ifname;
009d6127 3593 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3594
3595 /* empty network namespace */
3596 if (!netdev->ifindex) {
3597 if (netdev->flags & IFF_UP) {
3598 err = lxc_netdev_up("lo");
3599 if (err) {
6d1400b5 3600 errno = -err;
3601 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3602 return -1;
3603 }
3604 }
3605
3606 if (netdev->type == LXC_NET_EMPTY)
3607 return 0;
3608
3609 if (netdev->type == LXC_NET_NONE)
3610 return 0;
3611
e389f2af
CB
3612 netdev->ifindex = if_nametoindex(netdev->created_name);
3613 if (!netdev->ifindex)
3614 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3c09b97c 3615 netdev->created_name ?: "(null)");
811ef482
CB
3616 }
3617
3618 /* get the new ifindex in case of physical netdev */
3619 if (netdev->type == LXC_NET_PHYS) {
3620 netdev->ifindex = if_nametoindex(netdev->link);
3621 if (!netdev->ifindex) {
3622 ERROR("Failed to get ifindex for network device \"%s\"",
3623 netdev->link);
3624 return -1;
3625 }
3626 }
3627
3628 /* retrieve the name of the interface */
3629 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3630 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3631 netdev->ifindex);
811ef482
CB
3632 return -1;
3633 }
3634
e389f2af 3635 /* Default: let the system choose an interface name.
811ef482
CB
3636 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3637 * netlink will replace the format specifier with an appropriate index.
3638 */
de4855a8
CB
3639 if (netdev->name[0] == '\0') {
3640 if (netdev->type == LXC_NET_PHYS)
94b1cade 3641 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3642 else
94b1cade 3643 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3644 }
811ef482
CB
3645
3646 /* rename the interface name */
e389f2af
CB
3647 if (strcmp(current_ifname, netdev->name) != 0) {
3648 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3649 if (err) {
6d1400b5 3650 errno = -err;
3651 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3652 current_ifname, netdev->name);
811ef482
CB
3653 return -1;
3654 }
e389f2af
CB
3655
3656 TRACE("Renamed network device from \"%s\" to \"%s\"",
3657 current_ifname, netdev->name);
811ef482
CB
3658 }
3659
3660 /* Re-read the name of the interface because its name has changed
3661 * and would be automatically allocated by the system
3662 */
3663 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3664 ERROR("Failed get name for network device with ifindex %d",
3665 netdev->ifindex);
3666 return -1;
3667 }
3668
790255cf
CB
3669 /* Now update the recorded name of the network device to reflect the
3670 * name of the network device in the child's network namespace. We will
3671 * later on send this information back to the parent.
3672 */
94b1cade 3673 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3674
811ef482
CB
3675 /* set a mac address */
3676 if (netdev->hwaddr) {
3677 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3678 ERROR("Failed to setup hw address for network device \"%s\"",
3679 current_ifname);
3680 return -1;
3681 }
3682 }
3683
3684 /* setup ipv4 addresses on the interface */
3685 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3686 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3687 current_ifname);
811ef482
CB
3688 return -1;
3689 }
3690
3691 /* setup ipv6 addresses on the interface */
3692 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3693 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3694 current_ifname);
811ef482
CB
3695 return -1;
3696 }
3697
3698 /* set the network device up */
3699 if (netdev->flags & IFF_UP) {
811ef482
CB
3700 err = lxc_netdev_up(current_ifname);
3701 if (err) {
6d1400b5 3702 errno = -err;
3703 SYSERROR("Failed to set network device \"%s\" up",
3704 current_ifname);
811ef482
CB
3705 return -1;
3706 }
3707
3708 /* the network is up, make the loopback up too */
3709 err = lxc_netdev_up("lo");
3710 if (err) {
6d1400b5 3711 errno = -err;
3712 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3713 return -1;
3714 }
3715 }
3716
811ef482 3717 /* setup ipv4 gateway on the interface */
a2f9a670 3718 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3719 if (!(netdev->flags & IFF_UP)) {
3720 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3721 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3722 return -1;
3723 }
3724
3725 if (lxc_list_empty(&netdev->ipv4)) {
3726 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3727 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3728 return -1;
3729 }
3730
a2f9a670 3731 /* Setup device route if ipv4_gateway_dev is enabled */
3732 if (netdev->ipv4_gateway_dev) {
3733 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3734 if (err < 0) {
3735 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3736 current_ifname);
596a002c 3737 return ret_set_errno(-1, -err);
811ef482 3738 }
a2f9a670 3739 } else {
009d6127 3740 /* Check the gateway address is valid */
3741 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3742 return ret_set_errno(-1, errno);
009d6127 3743
3744 /* Try adding a default route to the gateway address */
811ef482 3745 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3746 if (err < 0) {
3747 /* If adding the default route fails, this could be because the
3748 * gateway address is in a different subnet to the container's address.
3749 * To work around this, we try adding a static device route to the
3750 * gateway address first, and then try again.
3751 */
a2f9a670 3752 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3753 if (err < 0) {
a2f9a670 3754 errno = -err;
009d6127 3755 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3756 bufinet4, current_ifname);
009d6127 3757 return -1;
a2f9a670 3758 }
6d1400b5 3759
a2f9a670 3760 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3761 if (err < 0) {
a2f9a670 3762 errno = -err;
009d6127 3763 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3764 bufinet4, current_ifname);
a2f9a670 3765 return -1;
811ef482 3766 }
811ef482
CB
3767 }
3768 }
3769 }
3770
3771 /* setup ipv6 gateway on the interface */
a2f9a670 3772 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3773 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3774 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3775 current_ifname);
811ef482
CB
3776 return -1;
3777 }
3778
3779 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3780 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3781 current_ifname);
811ef482
CB
3782 return -1;
3783 }
3784
a2f9a670 3785 /* Setup device route if ipv6_gateway_dev is enabled */
3786 if (netdev->ipv6_gateway_dev) {
3787 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3788 if (err < 0) {
3789 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3790 current_ifname);
596a002c 3791 return ret_set_errno(-1, -err);
811ef482 3792 }
a2f9a670 3793 } else {
009d6127 3794 /* Check the gateway address is valid */
3795 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3796 return ret_set_errno(-1, errno);
009d6127 3797
3798 /* Try adding a default route to the gateway address */
811ef482 3799 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3800 if (err < 0) {
3801 /* If adding the default route fails, this could be because the
3802 * gateway address is in a different subnet to the container's address.
3803 * To work around this, we try adding a static device route to the
3804 * gateway address first, and then try again.
3805 */
a2f9a670 3806 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3807 if (err < 0) {
a2f9a670 3808 errno = -err;
009d6127 3809 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3810 bufinet6, current_ifname);
009d6127 3811 return -1;
a2f9a670 3812 }
6d1400b5 3813
a2f9a670 3814 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3815 if (err < 0) {
a2f9a670 3816 errno = -err;
009d6127 3817 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3818 bufinet6, current_ifname);
a2f9a670 3819 return -1;
811ef482 3820 }
811ef482
CB
3821 }
3822 }
3823 }
3824
74c6e2b0 3825 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3826
3827 return 0;
3828}
3829
3830int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3831 struct lxc_list *network)
3832{
3833 struct lxc_list *iterator;
811ef482 3834
811ef482 3835 lxc_list_for_each(iterator, network) {
e389f2af 3836 struct lxc_netdev *netdev = iterator->elem;
811ef482 3837
811ef482 3838 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3839 ERROR("Failed to setup netdev");
811ef482
CB
3840 return -1;
3841 }
3842 }
3843
3844 if (!lxc_list_empty(network))
e389f2af 3845 INFO("Network has been setup");
811ef482
CB
3846
3847 return 0;
3848}
7ab1ba02 3849
3c09b97c 3850int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3851{
3852 struct lxc_list *iterator;
3853 struct lxc_list *network = &handler->conf->network;
3854 int data_sock = handler->data_sock[0];
3855
7ab1ba02
CB
3856 lxc_list_for_each(iterator, network) {
3857 int ret;
3858 struct lxc_netdev *netdev = iterator->elem;
3859
3c09b97c 3860 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3861 continue;
3862
7fbb15ec 3863 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3864 if (ret < 0)
7ab1ba02 3865 return -1;
e389f2af
CB
3866
3867 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3868 if (ret < 0)
3869 return -1;
3870
3871 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3872 }
3873
3874 return 0;
3875}
3876
3c09b97c 3877int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3878{
3879 struct lxc_list *iterator;
3880 struct lxc_list *network = &handler->conf->network;
3881 int data_sock = handler->data_sock[1];
3882
7ab1ba02
CB
3883 lxc_list_for_each(iterator, network) {
3884 int ret;
3885 struct lxc_netdev *netdev = iterator->elem;
3886
3c09b97c 3887 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3888 continue;
3889
e3233f26 3890 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3891 if (ret < 0)
7ab1ba02 3892 return -1;
e389f2af
CB
3893
3894 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3895 if (ret < 0)
3896 return -1;
54256301 3897
e389f2af 3898 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3899 }
3900
3901 return 0;
3902}
a1ae535a
CB
3903
3904int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3905{
3906 struct lxc_list *iterator, *network;
3907 int data_sock = handler->data_sock[0];
3908
3909 if (!handler->am_root)
3910 return 0;
3911
3912 network = &handler->conf->network;
3913 lxc_list_for_each(iterator, network) {
3914 int ret;
3915 struct lxc_netdev *netdev = iterator->elem;
3916
3917 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3918 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3919 if (ret < 0)
7729f8e5 3920 return -1;
a1ae535a
CB
3921
3922 /* Send network device ifindex in the child's namespace to
3923 * parent.
3924 */
7fbb15ec 3925 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3926 if (ret < 0)
7729f8e5 3927 return -1;
a1ae535a
CB
3928 }
3929
e389f2af
CB
3930 if (!lxc_list_empty(network))
3931 TRACE("Sent network device names and ifindices to parent");
3932
a1ae535a 3933 return 0;
a1ae535a
CB
3934}
3935
3936int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3937{
3938 struct lxc_list *iterator, *network;
3939 int data_sock = handler->data_sock[1];
3940
3941 if (!handler->am_root)
3942 return 0;
3943
3944 network = &handler->conf->network;
3945 lxc_list_for_each(iterator, network) {
3946 int ret;
3947 struct lxc_netdev *netdev = iterator->elem;
3948
3949 /* Receive network device name in the child's namespace to
3950 * parent.
3951 */
e3233f26 3952 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3953 if (ret < 0)
7729f8e5 3954 return -1;
a1ae535a
CB
3955
3956 /* Receive network device ifindex in the child's namespace to
3957 * parent.
3958 */
e3233f26 3959 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3960 if (ret < 0)
7729f8e5 3961 return -1;
a1ae535a
CB
3962 }
3963
3964 return 0;
a1ae535a 3965}
bb84beda
CB
3966
3967void lxc_delete_network(struct lxc_handler *handler)
3968{
3969 bool bret;
3970
3971 if (handler->am_root)
3972 bret = lxc_delete_network_priv(handler);
3973 else
3974 bret = lxc_delete_network_unpriv(handler);
3975 if (!bret)
3976 DEBUG("Failed to delete network devices");
3977 else
3978 DEBUG("Deleted network devices");
3979}
1cd95214 3980
1cd95214
CB
3981int lxc_netns_set_nsid(int fd)
3982{
41a3300d 3983 int ret;
0ce60f0d
CB
3984 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3985 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3986 NLMSG_ALIGN(1024)];
1cd95214 3987 struct nl_handler nlh;
0ce60f0d
CB
3988 struct nlmsghdr *hdr;
3989 struct rtgenmsg *msg;
bfcedc7e 3990 int saved_errno;
9d036caa
CB
3991 const __s32 ns_id = -1;
3992 const __u32 netns_fd = fd;
1cd95214
CB
3993
3994 ret = netlink_open(&nlh, NETLINK_ROUTE);
3995 if (ret < 0)
41a3300d 3996 return -1;
1cd95214 3997
0ce60f0d 3998 memset(buf, 0, sizeof(buf));
6ce39620
CB
3999
4000#pragma GCC diagnostic push
4001#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4002 hdr = (struct nlmsghdr *)buf;
4003 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4004#pragma GCC diagnostic pop
1cd95214 4005
0ce60f0d
CB
4006 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4007 hdr->nlmsg_type = RTM_NEWNSID;
4008 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4009 hdr->nlmsg_pid = 0;
4010 hdr->nlmsg_seq = RTM_NEWNSID;
4011 msg->rtgen_family = AF_UNSPEC;
1cd95214 4012
9d036caa
CB
4013 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4014 if (ret < 0)
4015 goto on_error;
4016
4017 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4018 if (ret < 0)
4019 goto on_error;
1cd95214 4020
9fbbc427 4021 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
4022
4023on_error:
bfcedc7e 4024 saved_errno = errno;
1cd95214 4025 netlink_close(&nlh);
bfcedc7e 4026 errno = saved_errno;
1cd95214 4027
9d036caa 4028 return ret;
1cd95214 4029}
938980ba
CB
4030
4031static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4032{
4033
4034 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4035
4036 while (RTA_OK(rta, len)) {
4037 unsigned short type = rta->rta_type;
4038
4039 if ((type <= max) && (!tb[type]))
4040 tb[type] = rta;
4041
6ce39620
CB
4042#pragma GCC diagnostic push
4043#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4044 rta = RTA_NEXT(rta, len);
6ce39620 4045#pragma GCC diagnostic pop
938980ba
CB
4046 }
4047
4048 return 0;
4049}
4050
4051static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4052{
4053 return *(__s32 *)RTA_DATA(rta);
4054}
4055
4056#ifndef NETNS_RTA
4057#define NETNS_RTA(r) \
4058 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4059#endif
4060
4061int lxc_netns_get_nsid(int fd)
4062{
4063 int ret;
4064 ssize_t len;
4065 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4066 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4067 NLMSG_ALIGN(1024)];
938980ba
CB
4068 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
4069 struct nl_handler nlh;
4070 struct nlmsghdr *hdr;
4071 struct rtgenmsg *msg;
4072 int saved_errno;
4073 __u32 netns_fd = fd;
4074
4075 ret = netlink_open(&nlh, NETLINK_ROUTE);
4076 if (ret < 0)
4077 return -1;
4078
4079 memset(buf, 0, sizeof(buf));
6ce39620
CB
4080
4081#pragma GCC diagnostic push
4082#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4083 hdr = (struct nlmsghdr *)buf;
4084 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4085#pragma GCC diagnostic pop
938980ba
CB
4086
4087 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4088 hdr->nlmsg_type = RTM_GETNSID;
4089 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4090 hdr->nlmsg_pid = 0;
4091 hdr->nlmsg_seq = RTM_GETNSID;
4092 msg->rtgen_family = AF_UNSPEC;
4093
9d036caa
CB
4094 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4095 if (ret == 0)
4096 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 4097
938980ba
CB
4098 saved_errno = errno;
4099 netlink_close(&nlh);
4100 errno = saved_errno;
4101 if (ret < 0)
4102 return -1;
4103
9d036caa 4104 errno = EINVAL;
938980ba
CB
4105 msg = NLMSG_DATA(hdr);
4106 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4107 if (len < 0)
4108 return -1;
4109
6ce39620
CB
4110#pragma GCC diagnostic push
4111#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4112 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4113 if (tb[__LXC_NETNSA_NSID])
4114 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4115#pragma GCC diagnostic pop
938980ba
CB
4116
4117 return -1;
4118}
e389f2af
CB
4119
4120int lxc_create_network(struct lxc_handler *handler)
4121{
4122 int ret;
4123
e389f2af
CB
4124 if (handler->am_root) {
4125 ret = lxc_create_network_priv(handler);
4126 if (ret)
4127 return -1;
4128
4129 return lxc_network_move_created_netdev_priv(handler);
4130 }
4131
4132 return lxc_create_network_unpriv(handler);
4133}