]> git.proxmox.com Git - mirror_frr.git/blame - zebra/rt_netlink.c
Merge pull request #1745 from mkanjari/type5-route-policy
[mirror_frr.git] / zebra / rt_netlink.c
CommitLineData
718e3744 1/* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21#include <zebra.h>
ddfeb486
DL
22
23#ifdef HAVE_NETLINK
24
8ccc7e80 25#include <net/if_arp.h>
ba777396
RW
26#include <linux/lwtunnel.h>
27#include <linux/mpls_iptunnel.h>
28#include <linux/neighbour.h>
29#include <linux/rtnetlink.h>
718e3744 30
31/* Hack for GNU libc version 2. */
32#ifndef MSG_TRUNC
33#define MSG_TRUNC 0x20
34#endif /* MSG_TRUNC */
35
36#include "linklist.h"
37#include "if.h"
38#include "log.h"
39#include "prefix.h"
40#include "connected.h"
41#include "table.h"
26e2ae36 42#include "memory.h"
4a1ab8e4 43#include "zebra_memory.h"
718e3744 44#include "rib.h"
e04ab74d 45#include "thread.h"
edd7c245 46#include "privs.h"
fb018d25 47#include "nexthop.h"
78104b9b 48#include "vrf.h"
5e6a74d8 49#include "vty.h"
40c7bdb0 50#include "mpls.h"
13d60d35 51#include "vxlan.h"
718e3744 52
53#include "zebra/zserv.h"
fe18ee2d 54#include "zebra/zebra_ns.h"
7c551956 55#include "zebra/zebra_vrf.h"
6621ca86 56#include "zebra/rt.h"
718e3744 57#include "zebra/redistribute.h"
58#include "zebra/interface.h"
59#include "zebra/debug.h"
12f6fb97 60#include "zebra/rtadv.h"
567b877d 61#include "zebra/zebra_ptm.h"
40c7bdb0 62#include "zebra/zebra_mpls.h"
1fdc9eae 63#include "zebra/kernel_netlink.h"
64#include "zebra/rt_netlink.h"
e3be0432 65#include "zebra/zebra_mroute.h"
2232a77c 66#include "zebra/zebra_vxlan.h"
e3be0432 67
40c7bdb0 68#ifndef AF_MPLS
69#define AF_MPLS 28
70#endif
71
2232a77c 72static vlanid_t filter_vlan = 0;
73
d62a17ae 74struct gw_family_t {
75 u_int16_t filler;
76 u_int16_t family;
77 union g_addr gate;
40c7bdb0 78};
79
8755598a
DS
80char ipv4_ll_buf[16] = "169.254.0.1";
81struct in_addr ipv4_ll;
82
83/*
84 * The ipv4_ll data structure is used for all 5549
85 * additions to the kernel. Let's figure out the
86 * correct value one time instead for every
87 * install/remove of a 5549 type route
88 */
d62a17ae 89void rt_netlink_init(void)
8755598a 90{
d62a17ae 91 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
8755598a
DS
92}
93
23b1f334
DD
94static inline int is_selfroute(int proto)
95{
d62a17ae 96 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
97 || (proto == RTPROT_STATIC) || (proto == RTPROT_ZEBRA)
98 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
99 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
915902cb 100 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
8a71d93d 101 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)) {
d62a17ae 102 return 1;
103 }
104
105 return 0;
23b1f334
DD
106}
107
915902cb 108static inline int zebra2proto(int proto)
23b1f334 109{
d62a17ae 110 switch (proto) {
111 case ZEBRA_ROUTE_BABEL:
112 proto = RTPROT_BABEL;
113 break;
114 case ZEBRA_ROUTE_BGP:
115 proto = RTPROT_BGP;
116 break;
117 case ZEBRA_ROUTE_OSPF:
118 case ZEBRA_ROUTE_OSPF6:
119 proto = RTPROT_OSPF;
120 break;
121 case ZEBRA_ROUTE_STATIC:
122 proto = RTPROT_STATIC;
123 break;
124 case ZEBRA_ROUTE_ISIS:
125 proto = RTPROT_ISIS;
126 break;
127 case ZEBRA_ROUTE_RIP:
128 proto = RTPROT_RIP;
129 break;
130 case ZEBRA_ROUTE_RIPNG:
131 proto = RTPROT_RIPNG;
132 break;
133 case ZEBRA_ROUTE_NHRP:
134 proto = RTPROT_NHRP;
135 break;
136 case ZEBRA_ROUTE_EIGRP:
137 proto = RTPROT_EIGRP;
138 break;
139 case ZEBRA_ROUTE_LDP:
140 proto = RTPROT_LDP;
141 break;
8a71d93d
DS
142 case ZEBRA_ROUTE_SHARP:
143 proto = RTPROT_SHARP;
144 break;
d62a17ae 145 default:
146 proto = RTPROT_ZEBRA;
147 break;
148 }
149
150 return proto;
23b1f334
DD
151}
152
915902cb
DS
153static inline int proto2zebra(int proto, int family)
154{
155 switch (proto) {
156 case RTPROT_BABEL:
157 proto = ZEBRA_ROUTE_BABEL;
158 break;
159 case RTPROT_BGP:
160 proto = ZEBRA_ROUTE_BGP;
161 break;
162 case RTPROT_OSPF:
163 proto = (family == AFI_IP) ?
164 ZEBRA_ROUTE_OSPF : ZEBRA_ROUTE_OSPF6;
165 break;
166 case RTPROT_ISIS:
167 proto = ZEBRA_ROUTE_ISIS;
168 break;
169 case RTPROT_RIP:
170 proto = ZEBRA_ROUTE_RIP;
171 break;
172 case RTPROT_RIPNG:
173 proto = ZEBRA_ROUTE_RIPNG;
174 break;
175 case RTPROT_NHRP:
176 proto = ZEBRA_ROUTE_NHRP;
177 break;
178 case RTPROT_EIGRP:
179 proto = ZEBRA_ROUTE_EIGRP;
180 break;
181 case RTPROT_LDP:
182 proto = ZEBRA_ROUTE_LDP;
183 break;
184 case RTPROT_STATIC:
185 proto = ZEBRA_ROUTE_STATIC;
186 break;
187 default:
188 proto = ZEBRA_ROUTE_KERNEL;
189 break;
190 }
191 return proto;
192}
193
12f6fb97
DS
194/*
195Pending: create an efficient table_id (in a tree/hash) based lookup)
196 */
d62a17ae 197static vrf_id_t vrf_lookup_by_table(u_int32_t table_id)
12f6fb97 198{
d62a17ae 199 struct vrf *vrf;
200 struct zebra_vrf *zvrf;
12f6fb97 201
a2addae8 202 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
d62a17ae 203 if ((zvrf = vrf->info) == NULL || (zvrf->table_id != table_id))
204 continue;
12f6fb97 205
d62a17ae 206 return zvrf_id(zvrf);
207 }
12f6fb97 208
d62a17ae 209 return VRF_DEFAULT;
12f6fb97
DS
210}
211
718e3744 212/* Looking up routing table by netlink interface. */
d62a17ae 213static int netlink_route_change_read_unicast(struct sockaddr_nl *snl,
214 struct nlmsghdr *h, ns_id_t ns_id,
215 int startup)
718e3744 216{
d62a17ae 217 int len;
218 struct rtmsg *rtm;
219 struct rtattr *tb[RTA_MAX + 1];
220 u_char flags = 0;
221 struct prefix p;
792fa92e 222 struct prefix_ipv6 src_p = {};
d62a17ae 223 vrf_id_t vrf_id = VRF_DEFAULT;
224
225 char anyaddr[16] = {0};
226
915902cb 227 int proto = ZEBRA_ROUTE_KERNEL;
d62a17ae 228 int index = 0;
229 int table;
230 int metric = 0;
231 u_int32_t mtu = 0;
25715c7e 232 uint8_t distance = 0;
4e40b6d6 233 route_tag_t tag = 0;
d62a17ae 234
235 void *dest = NULL;
236 void *gate = NULL;
237 void *prefsrc = NULL; /* IPv4 preferred source host address */
238 void *src = NULL; /* IPv6 srcdest source prefix */
e655a03c 239 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
d62a17ae 240
241 rtm = NLMSG_DATA(h);
242
243 if (startup && h->nlmsg_type != RTM_NEWROUTE)
244 return 0;
e655a03c
DL
245 switch (rtm->rtm_type) {
246 case RTN_UNICAST:
247 break;
248 case RTN_BLACKHOLE:
249 bh_type = BLACKHOLE_NULL;
250 break;
251 case RTN_UNREACHABLE:
252 bh_type = BLACKHOLE_REJECT;
253 break;
254 case RTN_PROHIBIT:
255 bh_type = BLACKHOLE_ADMINPROHIB;
256 break;
257 default:
d62a17ae 258 return 0;
e655a03c 259 }
d62a17ae 260
261 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
262 if (len < 0)
263 return -1;
264
265 memset(tb, 0, sizeof tb);
266 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
267
268 if (rtm->rtm_flags & RTM_F_CLONED)
269 return 0;
270 if (rtm->rtm_protocol == RTPROT_REDIRECT)
271 return 0;
272 if (rtm->rtm_protocol == RTPROT_KERNEL)
273 return 0;
274
275 if (!startup && is_selfroute(rtm->rtm_protocol)
276 && h->nlmsg_type == RTM_NEWROUTE)
277 return 0;
278
279 /* We don't care about change notifications for the MPLS table. */
280 /* TODO: Revisit this. */
281 if (rtm->rtm_family == AF_MPLS)
282 return 0;
283
284 /* Table corresponding to route. */
285 if (tb[RTA_TABLE])
286 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
287 else
288 table = rtm->rtm_table;
289
290 /* Map to VRF */
291 vrf_id = vrf_lookup_by_table(table);
292 if (vrf_id == VRF_DEFAULT) {
293 if (!is_zebra_valid_kernel_table(table)
294 && !is_zebra_main_routing_table(table))
295 return 0;
296 }
297
298 /* Route which inserted by Zebra. */
915902cb 299 if (is_selfroute(rtm->rtm_protocol)) {
d62a17ae 300 flags |= ZEBRA_FLAG_SELFROUTE;
915902cb
DS
301 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
302 }
d62a17ae 303 if (tb[RTA_OIF])
304 index = *(int *)RTA_DATA(tb[RTA_OIF]);
305
306 if (tb[RTA_DST])
307 dest = RTA_DATA(tb[RTA_DST]);
308 else
309 dest = anyaddr;
310
311 if (tb[RTA_SRC])
312 src = RTA_DATA(tb[RTA_SRC]);
313 else
314 src = anyaddr;
315
316 if (tb[RTA_PREFSRC])
317 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
318
319 if (tb[RTA_GATEWAY])
320 gate = RTA_DATA(tb[RTA_GATEWAY]);
321
f19435a8
DS
322 if (tb[RTA_PRIORITY])
323 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
d62a17ae 324
4e40b6d6
KK
325#if defined(SUPPORT_REALMS)
326 if (tb[RTA_FLOW])
327 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
328#endif
329
f19435a8
DS
330 if (tb[RTA_METRICS]) {
331 struct rtattr *mxrta[RTAX_MAX + 1];
d62a17ae 332
f19435a8
DS
333 memset(mxrta, 0, sizeof mxrta);
334 netlink_parse_rtattr(mxrta, RTAX_MAX,
335 RTA_DATA(tb[RTA_METRICS]),
336 RTA_PAYLOAD(tb[RTA_METRICS]));
d62a17ae 337
f19435a8
DS
338 if (mxrta[RTAX_MTU])
339 mtu = *(u_int32_t *)RTA_DATA(mxrta[RTAX_MTU]);
d62a17ae 340 }
341
342 if (rtm->rtm_family == AF_INET) {
343 p.family = AF_INET;
344 memcpy(&p.u.prefix4, dest, 4);
345 p.prefixlen = rtm->rtm_dst_len;
346
347 src_p.prefixlen =
348 0; // Forces debug below to not display anything
349 } else if (rtm->rtm_family == AF_INET6) {
350 p.family = AF_INET6;
351 memcpy(&p.u.prefix6, dest, 16);
352 p.prefixlen = rtm->rtm_dst_len;
353
354 src_p.family = AF_INET6;
355 memcpy(&src_p.prefix, src, 16);
356 src_p.prefixlen = rtm->rtm_src_len;
357 }
358
359 if (rtm->rtm_src_len != 0) {
360 char buf[PREFIX_STRLEN];
361 zlog_warn(
362 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
363 prefix2str(&p, buf, sizeof(buf)), vrf_id);
364 return 0;
365 }
366
25715c7e
DS
367 /*
368 * For ZEBRA_ROUTE_KERNEL types:
369 *
370 * The metric/priority of the route received from the kernel
371 * is a 32 bit number. We are going to interpret the high
372 * order byte as the Admin Distance and the low order 3 bytes
373 * as the metric.
374 *
375 * This will allow us to do two things:
376 * 1) Allow the creation of kernel routes that can be
377 * overridden by zebra.
378 * 2) Allow the old behavior for 'most' kernel route types
379 * if a user enters 'ip route ...' v4 routes get a metric
380 * of 0 and v6 routes get a metric of 1024. Both of these
381 * values will end up with a admin distance of 0, which
382 * will cause them to win for the purposes of zebra.
383 */
384 if (proto == ZEBRA_ROUTE_KERNEL) {
385 distance = (metric >> 24) & 0xFF;
386 metric = (metric & 0x00FFFFFF);
387 }
388
d62a17ae 389 if (IS_ZEBRA_DEBUG_KERNEL) {
390 char buf[PREFIX_STRLEN];
391 char buf2[PREFIX_STRLEN];
392 zlog_debug(
25715c7e 393 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h->nlmsg_type),
d62a17ae 394 prefix2str(&p, buf, sizeof(buf)),
395 src_p.prefixlen ? " from " : "",
396 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
397 : "",
25715c7e 398 vrf_id, metric, distance);
d62a17ae 399 }
400
401 afi_t afi = AFI_IP;
402 if (rtm->rtm_family == AF_INET6)
403 afi = AFI_IP6;
404
405 if (h->nlmsg_type == RTM_NEWROUTE) {
8795f904
DS
406 struct interface *ifp;
407 vrf_id_t nh_vrf_id = vrf_id;
408
fd36be7e
DL
409 if (!tb[RTA_MULTIPATH]) {
410 struct nexthop nh;
411 size_t sz = (afi == AFI_IP) ? 4 : 16;
412
413 memset(&nh, 0, sizeof(nh));
af760ec1
DS
414
415 if (bh_type == BLACKHOLE_UNSPEC) {
416 if (index && !gate)
417 nh.type = NEXTHOP_TYPE_IFINDEX;
418 else if (index && gate)
1dca2eaa 419 nh.type = (afi == AFI_IP)
af760ec1
DS
420 ? NEXTHOP_TYPE_IPV4_IFINDEX
421 : NEXTHOP_TYPE_IPV6_IFINDEX;
422 else if (!index && gate)
1dca2eaa
RW
423 nh.type = (afi == AFI_IP)
424 ? NEXTHOP_TYPE_IPV4
425 : NEXTHOP_TYPE_IPV6;
af760ec1
DS
426 else {
427 nh.type = NEXTHOP_TYPE_BLACKHOLE;
428 nh.bh_type = bh_type;
429 }
430 } else {
fd36be7e 431 nh.type = NEXTHOP_TYPE_BLACKHOLE;
e655a03c
DL
432 nh.bh_type = bh_type;
433 }
fd36be7e
DL
434 nh.ifindex = index;
435 if (prefsrc)
436 memcpy(&nh.src, prefsrc, sz);
437 if (gate)
438 memcpy(&nh.gate, gate, sz);
915902cb 439
8795f904
DS
440 if (index) {
441 ifp = if_lookup_by_index(index,
442 VRF_UNKNOWN);
443 if (ifp)
444 nh_vrf_id = ifp->vrf_id;
445 }
4a7371e9 446 nh.vrf_id = nh_vrf_id;
8795f904 447
4a7371e9
DS
448 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
449 NULL, &nh, table, metric, mtu, distance, tag);
fd36be7e 450 } else {
d62a17ae 451 /* This is a multipath route */
452
453 struct route_entry *re;
454 struct rtnexthop *rtnh =
455 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
456
457 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
458
459 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
915902cb 460 re->type = proto;
25715c7e 461 re->distance = distance;
d62a17ae 462 re->flags = flags;
463 re->metric = metric;
464 re->mtu = mtu;
465 re->vrf_id = vrf_id;
466 re->table = table;
467 re->nexthop_num = 0;
468 re->uptime = time(NULL);
4e40b6d6 469 re->tag = tag;
d62a17ae 470
471 for (;;) {
4a7371e9 472 vrf_id_t nh_vrf_id;
d62a17ae 473 if (len < (int)sizeof(*rtnh)
474 || rtnh->rtnh_len > len)
475 break;
476
477 index = rtnh->rtnh_ifindex;
8795f904
DS
478 if (index) {
479 /*
480 * Yes we are looking this up
481 * for every nexthop and just
482 * using the last one looked
483 * up right now
484 */
485 ifp = if_lookup_by_index(index,
486 VRF_UNKNOWN);
487 if (ifp)
4a7371e9
DS
488 nh_vrf_id = ifp->vrf_id;
489 else {
490 zlog_warn(
491 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
492 __PRETTY_FUNCTION__,
493 index);
494 nh_vrf_id = VRF_DEFAULT;
495 }
496 } else
497 nh_vrf_id = vrf_id;
498
d62a17ae 499 gate = 0;
500 if (rtnh->rtnh_len > sizeof(*rtnh)) {
501 memset(tb, 0, sizeof(tb));
502 netlink_parse_rtattr(
503 tb, RTA_MAX, RTNH_DATA(rtnh),
504 rtnh->rtnh_len - sizeof(*rtnh));
505 if (tb[RTA_GATEWAY])
506 gate = RTA_DATA(
507 tb[RTA_GATEWAY]);
508 }
509
510 if (gate) {
511 if (rtm->rtm_family == AF_INET) {
512 if (index)
513 route_entry_nexthop_ipv4_ifindex_add(
514 re, gate,
4a7371e9
DS
515 prefsrc, index,
516 nh_vrf_id);
d62a17ae 517 else
518 route_entry_nexthop_ipv4_add(
519 re, gate,
4a7371e9
DS
520 prefsrc,
521 nh_vrf_id);
d62a17ae 522 } else if (rtm->rtm_family
523 == AF_INET6) {
524 if (index)
525 route_entry_nexthop_ipv6_ifindex_add(
4a7371e9
DS
526 re, gate, index,
527 nh_vrf_id);
d62a17ae 528 else
529 route_entry_nexthop_ipv6_add(
4a7371e9
DS
530 re, gate,
531 nh_vrf_id);
d62a17ae 532 }
533 } else
4a7371e9
DS
534 route_entry_nexthop_ifindex_add(
535 re, index, nh_vrf_id);
d62a17ae 536
537 len -= NLMSG_ALIGN(rtnh->rtnh_len);
538 rtnh = RTNH_NEXT(rtnh);
539 }
540
541 zserv_nexthop_num_warn(__func__,
542 (const struct prefix *)&p,
543 re->nexthop_num);
544 if (re->nexthop_num == 0)
545 XFREE(MTYPE_RE, re);
546 else
7990990e 547 rib_add_multipath(afi, SAFI_UNICAST, &p,
d62a17ae 548 NULL, re);
549 }
550 } else {
fd36be7e
DL
551 if (!tb[RTA_MULTIPATH]) {
552 struct nexthop nh;
553 size_t sz = (afi == AFI_IP) ? 4 : 16;
554
555 memset(&nh, 0, sizeof(nh));
8ba5bd58
RW
556 if (bh_type == BLACKHOLE_UNSPEC) {
557 if (index && !gate)
558 nh.type = NEXTHOP_TYPE_IFINDEX;
559 else if (index && gate)
560 nh.type =
561 (afi == AFI_IP)
562 ? NEXTHOP_TYPE_IPV4_IFINDEX
563 : NEXTHOP_TYPE_IPV6_IFINDEX;
564 else if (!index && gate)
565 nh.type = (afi == AFI_IP)
566 ? NEXTHOP_TYPE_IPV4
60466a63 567 : NEXTHOP_TYPE_IPV6;
8ba5bd58
RW
568 else {
569 nh.type = NEXTHOP_TYPE_BLACKHOLE;
570 nh.bh_type = BLACKHOLE_UNSPEC;
571 }
572 } else {
fd36be7e 573 nh.type = NEXTHOP_TYPE_BLACKHOLE;
8ba5bd58
RW
574 nh.bh_type = bh_type;
575 }
fd36be7e
DL
576 nh.ifindex = index;
577 if (gate)
578 memcpy(&nh.gate, gate, sz);
d62a17ae 579 rib_delete(afi, SAFI_UNICAST, vrf_id,
915902cb 580 proto, 0, flags, &p, NULL, &nh,
6134fd82 581 table, metric, true, NULL);
fd36be7e
DL
582 } else {
583 /* XXX: need to compare the entire list of nexthops
584 * here for NLM_F_APPEND stupidity */
585 rib_delete(afi, SAFI_UNICAST, vrf_id,
915902cb 586 proto, 0, flags, &p, NULL, NULL,
6134fd82 587 table, metric, true, NULL);
d62a17ae 588 }
589 }
590
591 return 0;
718e3744 592}
593
e3be0432
DS
594static struct mcast_route_data *mroute = NULL;
595
d62a17ae 596static int netlink_route_change_read_multicast(struct sockaddr_nl *snl,
597 struct nlmsghdr *h,
598 ns_id_t ns_id, int startup)
565fdc75 599{
d62a17ae 600 int len;
601 struct rtmsg *rtm;
602 struct rtattr *tb[RTA_MAX + 1];
603 struct mcast_route_data *m;
604 struct mcast_route_data mr;
605 int iif = 0;
606 int count;
607 int oif[256];
608 int oif_count = 0;
609 char sbuf[40];
610 char gbuf[40];
611 char oif_list[256] = "\0";
612 vrf_id_t vrf = ns_id;
43b5cc5e 613 int table;
d62a17ae 614
615 if (mroute)
616 m = mroute;
617 else {
618 memset(&mr, 0, sizeof(mr));
619 m = &mr;
620 }
621
622 rtm = NLMSG_DATA(h);
623
624 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
625
626 memset(tb, 0, sizeof tb);
627 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
90d82769 628
43b5cc5e
DS
629 if (tb[RTA_TABLE])
630 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
631 else
632 table = rtm->rtm_table;
633
634 vrf = vrf_lookup_by_table(table);
635
d62a17ae 636 if (tb[RTA_IIF])
637 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
638
639 if (tb[RTA_SRC])
bd8b9272 640 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
d62a17ae 641
642 if (tb[RTA_DST])
bd8b9272 643 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
d62a17ae 644
645 if ((RTA_EXPIRES <= RTA_MAX) && tb[RTA_EXPIRES])
646 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
647
648 if (tb[RTA_MULTIPATH]) {
649 struct rtnexthop *rtnh =
650 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
651
652 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
653 for (;;) {
654 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
655 break;
656
657 oif[oif_count] = rtnh->rtnh_ifindex;
658 oif_count++;
659
660 len -= NLMSG_ALIGN(rtnh->rtnh_len);
661 rtnh = RTNH_NEXT(rtnh);
662 }
663 }
664
665 if (IS_ZEBRA_DEBUG_KERNEL) {
666 struct interface *ifp;
0af35d90
RW
667 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
668 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
d62a17ae 669 for (count = 0; count < oif_count; count++) {
670 ifp = if_lookup_by_index(oif[count], vrf);
671 char temp[256];
672
673 sprintf(temp, "%s ", ifp->name);
674 strcat(oif_list, temp);
675 }
43b5cc5e 676 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf);
d62a17ae 677 ifp = if_lookup_by_index(iif, vrf);
43b5cc5e
DS
678 zlog_debug(
679 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
680 zvrf->vrf->name, vrf, nl_msg_type_to_str(h->nlmsg_type),
681 sbuf, gbuf, ifp->name, oif_list, m->lastused);
90d82769 682 }
d62a17ae 683 return 0;
565fdc75
DS
684}
685
d62a17ae 686int netlink_route_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
687 ns_id_t ns_id, int startup)
565fdc75 688{
d62a17ae 689 int len;
690 vrf_id_t vrf_id = ns_id;
691 struct rtmsg *rtm;
692
693 rtm = NLMSG_DATA(h);
694
695 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
696 /* If this is not route add/delete message print warning. */
697 zlog_warn("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
698 return 0;
699 }
700
701 /* Connected route. */
702 if (IS_ZEBRA_DEBUG_KERNEL)
703 zlog_debug("%s %s %s proto %s vrf %u",
704 nl_msg_type_to_str(h->nlmsg_type),
705 nl_family_to_str(rtm->rtm_family),
706 nl_rttype_to_str(rtm->rtm_type),
707 nl_rtproto_to_str(rtm->rtm_protocol), vrf_id);
708
709 /* We don't care about change notifications for the MPLS table. */
710 /* TODO: Revisit this. */
711 if (rtm->rtm_family == AF_MPLS)
712 return 0;
713
714 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
715 if (len < 0)
716 return -1;
717
e655a03c 718 if (rtm->rtm_type == RTN_MULTICAST)
d62a17ae 719 netlink_route_change_read_multicast(snl, h, ns_id, startup);
e655a03c
DL
720 else
721 netlink_route_change_read_unicast(snl, h, ns_id, startup);
d62a17ae 722 return 0;
565fdc75
DS
723}
724
289602d7 725/* Request for specific route information from the kernel */
d62a17ae 726static int netlink_request_route(struct zebra_ns *zns, int family, int type)
289602d7 727{
d62a17ae 728 struct {
729 struct nlmsghdr n;
730 struct rtmsg rtm;
731 } req;
732
733 /* Form the request, specifying filter (rtattr) if needed. */
734 memset(&req, 0, sizeof(req));
735 req.n.nlmsg_type = type;
736 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
737 req.rtm.rtm_family = family;
738
739 return netlink_request(&zns->netlink_cmd, &req.n);
289602d7 740}
741
718e3744 742/* Routing table read function using netlink interface. Only called
743 bootstrap time. */
d62a17ae 744int netlink_route_read(struct zebra_ns *zns)
718e3744 745{
d62a17ae 746 int ret;
747
748 /* Get IPv4 routing table. */
749 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
750 if (ret < 0)
751 return ret;
752 ret = netlink_parse_info(netlink_route_change_read_unicast,
753 &zns->netlink_cmd, zns, 0, 1);
754 if (ret < 0)
755 return ret;
756
757 /* Get IPv6 routing table. */
758 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
759 if (ret < 0)
760 return ret;
761 ret = netlink_parse_info(netlink_route_change_read_unicast,
762 &zns->netlink_cmd, zns, 0, 1);
763 if (ret < 0)
764 return ret;
765
766 return 0;
718e3744 767}
768
d62a17ae 769static void _netlink_route_nl_add_gateway_info(u_char route_family,
770 u_char gw_family,
771 struct nlmsghdr *nlmsg,
772 size_t req_size, int bytelen,
773 struct nexthop *nexthop)
40c7bdb0 774{
d62a17ae 775 if (route_family == AF_MPLS) {
776 struct gw_family_t gw_fam;
777
778 gw_fam.family = gw_family;
779 if (gw_family == AF_INET)
780 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
781 else
782 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
783 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
784 bytelen + 2);
785 } else {
786 if (gw_family == AF_INET)
787 addattr_l(nlmsg, req_size, RTA_GATEWAY,
788 &nexthop->gate.ipv4, bytelen);
789 else
790 addattr_l(nlmsg, req_size, RTA_GATEWAY,
791 &nexthop->gate.ipv6, bytelen);
792 }
40c7bdb0 793}
794
d62a17ae 795static void _netlink_route_rta_add_gateway_info(u_char route_family,
796 u_char gw_family,
797 struct rtattr *rta,
798 struct rtnexthop *rtnh,
799 size_t req_size, int bytelen,
800 struct nexthop *nexthop)
40c7bdb0 801{
d62a17ae 802 if (route_family == AF_MPLS) {
803 struct gw_family_t gw_fam;
804
805 gw_fam.family = gw_family;
806 if (gw_family == AF_INET)
807 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
808 else
809 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
810 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
811 bytelen + 2);
812 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
813 } else {
814 if (gw_family == AF_INET)
815 rta_addattr_l(rta, req_size, RTA_GATEWAY,
816 &nexthop->gate.ipv4, bytelen);
817 else
818 rta_addattr_l(rta, req_size, RTA_GATEWAY,
819 &nexthop->gate.ipv6, bytelen);
820 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
821 }
40c7bdb0 822}
823
fa713d9e
CF
824/* This function takes a nexthop as argument and adds
825 * the appropriate netlink attributes to an existing
826 * netlink message.
827 *
828 * @param routedesc: Human readable description of route type
829 * (direct/recursive, single-/multipath)
830 * @param bytelen: Length of addresses in bytes.
831 * @param nexthop: Nexthop information
832 * @param nlmsg: nlmsghdr structure to fill in.
833 * @param req_size: The size allocated for the message.
834 */
d62a17ae 835static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
836 struct nexthop *nexthop,
837 struct nlmsghdr *nlmsg,
838 struct rtmsg *rtmsg,
839 size_t req_size, int cmd)
fa713d9e 840{
8ecdb26e 841 struct mpls_label_stack *nh_label;
d62a17ae 842 mpls_lse_t out_lse[MPLS_MAX_LABELS];
9a62e84b 843 char label_buf[256];
d62a17ae 844
845 /*
846 * label_buf is *only* currently used within debugging.
847 * As such when we assign it we are guarding it inside
848 * a debug test. If you want to change this make sure
849 * you fix this assumption
850 */
851 label_buf[0] = '\0';
852 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
853 * (in the case of LER)
854 */
855 nh_label = nexthop->nh_label;
856 if (rtmsg->rtm_family == AF_MPLS) {
857 assert(nh_label);
858 assert(nh_label->num_labels == 1);
859 }
860
861 if (nh_label && nh_label->num_labels) {
862 int i, num_labels = 0;
863 u_int32_t bos;
864 char label_buf1[20];
865
866 for (i = 0; i < nh_label->num_labels; i++) {
70e98a7f 867 if (nh_label->label[i] != MPLS_LABEL_IMPLICIT_NULL) {
d62a17ae 868 bos = ((i == (nh_label->num_labels - 1)) ? 1
869 : 0);
870 out_lse[i] = mpls_lse_encode(nh_label->label[i],
871 0, 0, bos);
872 if (IS_ZEBRA_DEBUG_KERNEL) {
873 if (!num_labels)
9a62e84b 874 sprintf(label_buf, "label %u",
d62a17ae 875 nh_label->label[i]);
876 else {
9a62e84b 877 sprintf(label_buf1, "/%u",
d62a17ae 878 nh_label->label[i]);
9a62e84b
RW
879 strlcat(label_buf, label_buf1,
880 sizeof(label_buf));
d62a17ae 881 }
882 }
883 num_labels++;
884 }
885 }
886 if (num_labels) {
887 if (rtmsg->rtm_family == AF_MPLS)
888 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
889 num_labels * sizeof(mpls_lse_t));
890 else {
891 struct rtattr *nest;
892 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
893
894 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE,
895 &encap, sizeof(u_int16_t));
896 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
897 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST,
898 &out_lse,
899 num_labels * sizeof(mpls_lse_t));
900 addattr_nest_end(nlmsg, nest);
901 }
66d42727 902 }
0aabccc0 903 }
fa713d9e 904
d62a17ae 905 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
906 rtmsg->rtm_flags |= RTNH_F_ONLINK;
907
908 if (rtmsg->rtm_family == AF_INET
909 && (nexthop->type == NEXTHOP_TYPE_IPV6
910 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
911 rtmsg->rtm_flags |= RTNH_F_ONLINK;
912 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
913 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
914
915 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
916 addattr_l(nlmsg, req_size, RTA_PREFSRC,
917 &nexthop->rmap_src.ipv4, bytelen);
918 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
919 addattr_l(nlmsg, req_size, RTA_PREFSRC,
920 &nexthop->src.ipv4, bytelen);
921
922 if (IS_ZEBRA_DEBUG_KERNEL)
923 zlog_debug(
924 " 5549: _netlink_route_build_singlepath() (%s): "
925 "nexthop via %s %s if %u",
926 routedesc, ipv4_ll_buf, label_buf,
927 nexthop->ifindex);
928 return;
0aabccc0
DD
929 }
930
d62a17ae 931 if (nexthop->type == NEXTHOP_TYPE_IPV4
932 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
933 /* Send deletes to the kernel without specifying the next-hop */
934 if (cmd != RTM_DELROUTE)
935 _netlink_route_nl_add_gateway_info(
936 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
937 bytelen, nexthop);
938
939 if (cmd == RTM_NEWROUTE) {
940 if (nexthop->rmap_src.ipv4.s_addr)
941 addattr_l(nlmsg, req_size, RTA_PREFSRC,
942 &nexthop->rmap_src.ipv4, bytelen);
943 else if (nexthop->src.ipv4.s_addr)
944 addattr_l(nlmsg, req_size, RTA_PREFSRC,
945 &nexthop->src.ipv4, bytelen);
946 }
947
948 if (IS_ZEBRA_DEBUG_KERNEL)
949 zlog_debug(
950 "netlink_route_multipath() (%s): "
951 "nexthop via %s %s if %u",
952 routedesc, inet_ntoa(nexthop->gate.ipv4),
953 label_buf, nexthop->ifindex);
0aabccc0 954 }
fa713d9e 955
d62a17ae 956 if (nexthop->type == NEXTHOP_TYPE_IPV6
957 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
958 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
959 nlmsg, req_size, bytelen,
960 nexthop);
961
962 if (cmd == RTM_NEWROUTE) {
963 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
964 addattr_l(nlmsg, req_size, RTA_PREFSRC,
965 &nexthop->rmap_src.ipv6, bytelen);
966 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
967 addattr_l(nlmsg, req_size, RTA_PREFSRC,
968 &nexthop->src.ipv6, bytelen);
969 }
fa713d9e 970
d62a17ae 971 if (IS_ZEBRA_DEBUG_KERNEL)
972 zlog_debug(
973 "netlink_route_multipath() (%s): "
974 "nexthop via %s %s if %u",
975 routedesc, inet6_ntoa(nexthop->gate.ipv6),
976 label_buf, nexthop->ifindex);
977 }
5e210522
DS
978
979 /*
980 * We have the ifindex so we should always send it
981 * This is especially useful if we are doing route
982 * leaking.
983 */
984 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
d62a17ae 985 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
986
5e210522
DS
987 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
988 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
d62a17ae 989 if (cmd == RTM_NEWROUTE) {
990 if (nexthop->rmap_src.ipv4.s_addr)
991 addattr_l(nlmsg, req_size, RTA_PREFSRC,
992 &nexthop->rmap_src.ipv4, bytelen);
993 else if (nexthop->src.ipv4.s_addr)
994 addattr_l(nlmsg, req_size, RTA_PREFSRC,
995 &nexthop->src.ipv4, bytelen);
996 }
fa713d9e 997
d62a17ae 998 if (IS_ZEBRA_DEBUG_KERNEL)
999 zlog_debug(
1000 "netlink_route_multipath() (%s): "
1001 "nexthop via if %u",
1002 routedesc, nexthop->ifindex);
0aabccc0
DD
1003 }
1004
d62a17ae 1005 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
d62a17ae 1006 if (cmd == RTM_NEWROUTE) {
1007 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1008 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1009 &nexthop->rmap_src.ipv6, bytelen);
1010 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1011 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1012 &nexthop->src.ipv6, bytelen);
1013 }
1014
1015 if (IS_ZEBRA_DEBUG_KERNEL)
1016 zlog_debug(
1017 "netlink_route_multipath() (%s): "
1018 "nexthop via if %u",
1019 routedesc, nexthop->ifindex);
1020 }
fa713d9e
CF
1021}
1022
1023/* This function takes a nexthop as argument and
1024 * appends to the given rtattr/rtnexthop pair the
1025 * representation of the nexthop. If the nexthop
1026 * defines a preferred source, the src parameter
1027 * will be modified to point to that src, otherwise
1028 * it will be kept unmodified.
1029 *
1030 * @param routedesc: Human readable description of route type
1031 * (direct/recursive, single-/multipath)
1032 * @param bytelen: Length of addresses in bytes.
1033 * @param nexthop: Nexthop information
1034 * @param rta: rtnetlink attribute structure
1035 * @param rtnh: pointer to an rtnetlink nexthop structure
1036 * @param src: pointer pointing to a location where
1037 * the prefsrc should be stored.
1038 */
d62a17ae 1039static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1040 struct nexthop *nexthop,
1041 struct rtattr *rta,
1042 struct rtnexthop *rtnh,
1043 struct rtmsg *rtmsg,
1044 union g_addr **src)
fa713d9e 1045{
8ecdb26e 1046 struct mpls_label_stack *nh_label;
d62a17ae 1047 mpls_lse_t out_lse[MPLS_MAX_LABELS];
9a62e84b 1048 char label_buf[256];
d62a17ae 1049
1050 rtnh->rtnh_len = sizeof(*rtnh);
1051 rtnh->rtnh_flags = 0;
1052 rtnh->rtnh_hops = 0;
1053 rta->rta_len += rtnh->rtnh_len;
1054
1055 /*
1056 * label_buf is *only* currently used within debugging.
1057 * As such when we assign it we are guarding it inside
1058 * a debug test. If you want to change this make sure
1059 * you fix this assumption
1060 */
1061 label_buf[0] = '\0';
1062 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1063 * (in the case of LER)
1064 */
1065 nh_label = nexthop->nh_label;
1066 if (rtmsg->rtm_family == AF_MPLS) {
1067 assert(nh_label);
1068 assert(nh_label->num_labels == 1);
1069 }
1070
1071 if (nh_label && nh_label->num_labels) {
1072 int i, num_labels = 0;
1073 u_int32_t bos;
1074 char label_buf1[20];
1075
1076 for (i = 0; i < nh_label->num_labels; i++) {
70e98a7f 1077 if (nh_label->label[i] != MPLS_LABEL_IMPLICIT_NULL) {
d62a17ae 1078 bos = ((i == (nh_label->num_labels - 1)) ? 1
1079 : 0);
1080 out_lse[i] = mpls_lse_encode(nh_label->label[i],
1081 0, 0, bos);
1082 if (IS_ZEBRA_DEBUG_KERNEL) {
1083 if (!num_labels)
9a62e84b 1084 sprintf(label_buf, "label %u",
d62a17ae 1085 nh_label->label[i]);
1086 else {
9a62e84b 1087 sprintf(label_buf1, "/%u",
d62a17ae 1088 nh_label->label[i]);
9a62e84b
RW
1089 strlcat(label_buf, label_buf1,
1090 sizeof(label_buf));
d62a17ae 1091 }
1092 }
1093 num_labels++;
1094 }
1095 }
1096 if (num_labels) {
1097 if (rtmsg->rtm_family == AF_MPLS) {
1098 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1099 &out_lse,
1100 num_labels * sizeof(mpls_lse_t));
1101 rtnh->rtnh_len += RTA_LENGTH(
1102 num_labels * sizeof(mpls_lse_t));
1103 } else {
1104 struct rtattr *nest;
1105 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
1106 int len = rta->rta_len;
1107
1108 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1109 RTA_ENCAP_TYPE, &encap,
1110 sizeof(u_int16_t));
1111 nest = rta_nest(rta, NL_PKT_BUF_SIZE,
1112 RTA_ENCAP);
1113 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1114 MPLS_IPTUNNEL_DST, &out_lse,
1115 num_labels * sizeof(mpls_lse_t));
1116 rta_nest_end(rta, nest);
1117 rtnh->rtnh_len += rta->rta_len - len;
1118 }
66d42727 1119 }
d62a17ae 1120 }
1121
1122 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1123 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1124
1125 if (rtmsg->rtm_family == AF_INET
1126 && (nexthop->type == NEXTHOP_TYPE_IPV6
1127 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1128 bytelen = 4;
1129 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1130 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1131 bytelen);
1132 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1133 rtnh->rtnh_ifindex = nexthop->ifindex;
1134
1135 if (nexthop->rmap_src.ipv4.s_addr)
1136 *src = &nexthop->rmap_src;
1137 else if (nexthop->src.ipv4.s_addr)
1138 *src = &nexthop->src;
1139
1140 if (IS_ZEBRA_DEBUG_KERNEL)
1141 zlog_debug(
1142 " 5549: netlink_route_build_multipath() (%s): "
1143 "nexthop via %s %s if %u",
1144 routedesc, ipv4_ll_buf, label_buf,
1145 nexthop->ifindex);
1146 return;
1147 }
1148
1149 if (nexthop->type == NEXTHOP_TYPE_IPV4
1150 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1151 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1152 rta, rtnh, NL_PKT_BUF_SIZE,
1153 bytelen, nexthop);
1154 if (nexthop->rmap_src.ipv4.s_addr)
1155 *src = &nexthop->rmap_src;
1156 else if (nexthop->src.ipv4.s_addr)
1157 *src = &nexthop->src;
1158
1159 if (IS_ZEBRA_DEBUG_KERNEL)
1160 zlog_debug(
1161 "netlink_route_multipath() (%s): "
1162 "nexthop via %s %s if %u",
1163 routedesc, inet_ntoa(nexthop->gate.ipv4),
1164 label_buf, nexthop->ifindex);
1165 }
1166 if (nexthop->type == NEXTHOP_TYPE_IPV6
1167 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1168 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1169 rta, rtnh, NL_PKT_BUF_SIZE,
1170 bytelen, nexthop);
1171
1172 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1173 *src = &nexthop->rmap_src;
1174 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1175 *src = &nexthop->src;
1176
1177 if (IS_ZEBRA_DEBUG_KERNEL)
1178 zlog_debug(
1179 "netlink_route_multipath() (%s): "
1180 "nexthop via %s %s if %u",
1181 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1182 label_buf, nexthop->ifindex);
1183 }
5e210522
DS
1184
1185 /*
1186 * We have figured out the ifindex so we should always send it
1187 * This is especially useful if we are doing route
1188 * leaking.
1189 */
1190 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1191 rtnh->rtnh_ifindex = nexthop->ifindex;
1192
d62a17ae 1193 /* ifindex */
1194 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1195 || nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1196 if (nexthop->rmap_src.ipv4.s_addr)
1197 *src = &nexthop->rmap_src;
1198 else if (nexthop->src.ipv4.s_addr)
1199 *src = &nexthop->src;
1200
1201 if (IS_ZEBRA_DEBUG_KERNEL)
1202 zlog_debug(
1203 "netlink_route_multipath() (%s): "
1204 "nexthop via if %u",
1205 routedesc, nexthop->ifindex);
1206 } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
d62a17ae 1207 if (IS_ZEBRA_DEBUG_KERNEL)
1208 zlog_debug(
1209 "netlink_route_multipath() (%s): "
1210 "nexthop via if %u",
1211 routedesc, nexthop->ifindex);
1212 } else {
1213 rtnh->rtnh_ifindex = 0;
1214 }
fa713d9e
CF
1215}
1216
d62a17ae 1217static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1218 zebra_nhlfe_t *nhlfe,
1219 struct nlmsghdr *nlmsg,
1220 struct rtmsg *rtmsg,
1221 size_t req_size, int cmd)
40c7bdb0 1222{
d62a17ae 1223 int bytelen;
1224 u_char family;
40c7bdb0 1225
d62a17ae 1226 family = NHLFE_FAMILY(nhlfe);
1227 bytelen = (family == AF_INET ? 4 : 16);
1228 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1229 nlmsg, rtmsg, req_size, cmd);
40c7bdb0 1230}
1231
1232
1233static inline void
d62a17ae 1234_netlink_mpls_build_multipath(const char *routedesc, zebra_nhlfe_t *nhlfe,
1235 struct rtattr *rta, struct rtnexthop *rtnh,
1236 struct rtmsg *rtmsg, union g_addr **src)
40c7bdb0 1237{
d62a17ae 1238 int bytelen;
1239 u_char family;
40c7bdb0 1240
d62a17ae 1241 family = NHLFE_FAMILY(nhlfe);
1242 bytelen = (family == AF_INET ? 4 : 16);
1243 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1244 rtnh, rtmsg, src);
40c7bdb0 1245}
1246
1247
fa713d9e
CF
1248/* Log debug information for netlink_route_multipath
1249 * if debug logging is enabled.
1250 *
1251 * @param cmd: Netlink command which is to be processed
1252 * @param p: Prefix for which the change is due
1253 * @param nexthop: Nexthop which is currently processed
1254 * @param routedesc: Semantic annotation for nexthop
1255 * (recursive, multipath, etc.)
1256 * @param family: Address family which the change concerns
1257 */
d62a17ae 1258static void _netlink_route_debug(int cmd, struct prefix *p,
1259 struct nexthop *nexthop, const char *routedesc,
1260 int family, struct zebra_vrf *zvrf)
fa713d9e 1261{
d62a17ae 1262 if (IS_ZEBRA_DEBUG_KERNEL) {
1263 char buf[PREFIX_STRLEN];
1264 zlog_debug(
1265 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1266 routedesc, nl_msg_type_to_str(cmd),
1267 prefix2str(p, buf, sizeof(buf)), zvrf_id(zvrf),
1268 (nexthop) ? nexthop_type_to_str(nexthop->type) : "UNK");
1269 }
1270}
1271
1272static void _netlink_mpls_debug(int cmd, u_int32_t label, const char *routedesc)
40c7bdb0 1273{
d62a17ae 1274 if (IS_ZEBRA_DEBUG_KERNEL)
1275 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1276 nl_msg_type_to_str(cmd), label);
fa713d9e
CF
1277}
1278
d62a17ae 1279static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1280 int llalen)
5c610faf 1281{
d62a17ae 1282 struct {
1283 struct nlmsghdr n;
1284 struct ndmsg ndm;
1285 char buf[256];
1286 } req;
5c610faf 1287
d62a17ae 1288 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
8f7d9fc0 1289
d62a17ae 1290 memset(&req.n, 0, sizeof(req.n));
1291 memset(&req.ndm, 0, sizeof(req.ndm));
5c610faf 1292
d62a17ae 1293 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1294 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1295 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1296 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
a55ba23f 1297
d62a17ae 1298 req.ndm.ndm_family = AF_INET;
1299 req.ndm.ndm_state = NUD_PERMANENT;
1300 req.ndm.ndm_ifindex = ifindex;
1301 req.ndm.ndm_type = RTN_UNICAST;
5c610faf 1302
d62a17ae 1303 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1304 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
5c610faf 1305
d62a17ae 1306 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1307 0);
5c610faf
DS
1308}
1309
718e3744 1310/* Routing table change via netlink interface. */
6ae24471 1311/* Update flag indicates whether this is a "replace" or not. */
d62a17ae 1312static int netlink_route_multipath(int cmd, struct prefix *p,
1313 struct prefix *src_p, struct route_entry *re,
1314 int update)
718e3744 1315{
d62a17ae 1316 int bytelen;
1317 struct sockaddr_nl snl;
1318 struct nexthop *nexthop = NULL;
1319 unsigned int nexthop_num;
a8309422 1320 int discard = 0;
d62a17ae 1321 int family = PREFIX_FAMILY(p);
1322 const char *routedesc;
1323 int setsrc = 0;
1324 union g_addr src;
1325
1326 struct {
1327 struct nlmsghdr n;
1328 struct rtmsg r;
1329 char buf[NL_PKT_BUF_SIZE];
1330 } req;
1331
1332 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1333 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1334
1335 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1336
1337 bytelen = (family == AF_INET ? 4 : 16);
1338
1339 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1340 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1341 if ((cmd == RTM_NEWROUTE) && update)
1342 req.n.nlmsg_flags |= NLM_F_REPLACE;
1343 req.n.nlmsg_type = cmd;
1344 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1345
1346 req.r.rtm_family = family;
1347 req.r.rtm_dst_len = p->prefixlen;
1348 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
915902cb 1349 req.r.rtm_protocol = zebra2proto(re->type);
d62a17ae 1350 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
a8309422 1351 req.r.rtm_type = RTN_UNICAST;
d62a17ae 1352
d62a17ae 1353 addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1354 if (src_p)
1355 addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix,
1356 bytelen);
1357
1358 /* Metric. */
1359 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1360 * used
1361 * either by the kernel or by zebra. Its purely for calculating best
1362 * path(s)
1363 * by the routing protocol and for communicating with protocol peers.
1364 */
1365 addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
4e40b6d6
KK
1366#if defined(SUPPORT_REALMS)
1367 if (re->tag > 0 && re->tag <= 255)
1368 addattr32(&req.n, sizeof req, RTA_FLOW, re->tag);
1369#endif
d62a17ae 1370 /* Table corresponding to this route. */
1371 if (re->table < 256)
1372 req.r.rtm_table = re->table;
1373 else {
1374 req.r.rtm_table = RT_TABLE_UNSPEC;
1375 addattr32(&req.n, sizeof req, RTA_TABLE, re->table);
0aabccc0 1376 }
718e3744 1377
a8309422
DL
1378 if (discard)
1379 goto skip;
1380
d62a17ae 1381 if (re->mtu || re->nexthop_mtu) {
1382 char buf[NL_PKT_BUF_SIZE];
1383 struct rtattr *rta = (void *)buf;
1384 u_int32_t mtu = re->mtu;
1385 if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu))
1386 mtu = re->nexthop_mtu;
1387 rta->rta_type = RTA_METRICS;
1388 rta->rta_len = RTA_LENGTH(0);
1389 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1390 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1391 RTA_PAYLOAD(rta));
1392 }
1393
d62a17ae 1394 /* Count overall nexthops so we can decide whether to use singlepath
1395 * or multipath case. */
1396 nexthop_num = 0;
1397 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1398 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1399 continue;
1400 if (cmd == RTM_NEWROUTE
25b9cb0c 1401 && !NEXTHOP_IS_ACTIVE(nexthop->flags))
d62a17ae 1402 continue;
1403 if (cmd == RTM_DELROUTE
1404 && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
1405 continue;
1406
1407 nexthop_num++;
1408 }
1409
1410 /* Singlepath case. */
1411 if (nexthop_num == 1 || multipath_num == 1) {
1412 nexthop_num = 0;
1413 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1f1d24a8
DS
1414 /*
1415 * So we want to cover 2 types of blackhole
1416 * routes here:
1417 * 1) A normal blackhole route( ala from a static
1418 * install.
1419 * 2) A recursively resolved blackhole route
1420 */
1421 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1422 switch (nexthop->bh_type) {
1423 case BLACKHOLE_ADMINPROHIB:
1424 req.r.rtm_type = RTN_PROHIBIT;
1425 break;
1426 case BLACKHOLE_REJECT:
1427 req.r.rtm_type = RTN_UNREACHABLE;
1428 break;
1429 default:
1430 req.r.rtm_type = RTN_BLACKHOLE;
1431 break;
1432 }
1433 goto skip;
1434 }
d62a17ae 1435 if (CHECK_FLAG(nexthop->flags,
1436 NEXTHOP_FLAG_RECURSIVE)) {
1437 if (!setsrc) {
1438 if (family == AF_INET) {
1439 if (nexthop->rmap_src.ipv4
1440 .s_addr
1441 != 0) {
1442 src.ipv4 =
1443 nexthop->rmap_src
1444 .ipv4;
1445 setsrc = 1;
1446 } else if (nexthop->src.ipv4
1447 .s_addr
1448 != 0) {
1449 src.ipv4 =
1450 nexthop->src
1451 .ipv4;
1452 setsrc = 1;
1453 }
1454 } else if (family == AF_INET6) {
1455 if (!IN6_IS_ADDR_UNSPECIFIED(
1456 &nexthop->rmap_src
1457 .ipv6)) {
1458 src.ipv6 =
1459 nexthop->rmap_src
1460 .ipv6;
1461 setsrc = 1;
1462 } else if (
1463 !IN6_IS_ADDR_UNSPECIFIED(
1464 &nexthop->src
1465 .ipv6)) {
1466 src.ipv6 =
1467 nexthop->src
1468 .ipv6;
1469 setsrc = 1;
1470 }
1471 }
1472 }
1473 continue;
1474 }
1475
1476 if ((cmd == RTM_NEWROUTE
25b9cb0c 1477 && NEXTHOP_IS_ACTIVE(nexthop->flags))
d62a17ae 1478 || (cmd == RTM_DELROUTE
1479 && CHECK_FLAG(nexthop->flags,
1480 NEXTHOP_FLAG_FIB))) {
1481 routedesc = nexthop->rparent
8b1450b9
RW
1482 ? "recursive, single-path"
1483 : "single-path";
d62a17ae 1484
1485 _netlink_route_debug(cmd, p, nexthop, routedesc,
1486 family, zvrf);
1487 _netlink_route_build_singlepath(
1488 routedesc, bytelen, nexthop, &req.n,
1489 &req.r, sizeof req, cmd);
1490 nexthop_num++;
1491 break;
1492 }
1493 }
1494 if (setsrc && (cmd == RTM_NEWROUTE)) {
1495 if (family == AF_INET)
1496 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1497 &src.ipv4, bytelen);
1498 else if (family == AF_INET6)
1499 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1500 &src.ipv6, bytelen);
1501 }
1502 } else {
1503 char buf[NL_PKT_BUF_SIZE];
1504 struct rtattr *rta = (void *)buf;
1505 struct rtnexthop *rtnh;
1506 union g_addr *src1 = NULL;
1507
1508 rta->rta_type = RTA_MULTIPATH;
1509 rta->rta_len = RTA_LENGTH(0);
1510 rtnh = RTA_DATA(rta);
1511
1512 nexthop_num = 0;
1513 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1514 if (nexthop_num >= multipath_num)
1515 break;
1516
1517 if (CHECK_FLAG(nexthop->flags,
1518 NEXTHOP_FLAG_RECURSIVE)) {
1519 /* This only works for IPv4 now */
1520 if (!setsrc) {
1521 if (family == AF_INET) {
1522 if (nexthop->rmap_src.ipv4
1523 .s_addr
1524 != 0) {
1525 src.ipv4 =
1526 nexthop->rmap_src
1527 .ipv4;
1528 setsrc = 1;
1529 } else if (nexthop->src.ipv4
1530 .s_addr
1531 != 0) {
1532 src.ipv4 =
1533 nexthop->src
1534 .ipv4;
1535 setsrc = 1;
1536 }
1537 } else if (family == AF_INET6) {
1538 if (!IN6_IS_ADDR_UNSPECIFIED(
1539 &nexthop->rmap_src
1540 .ipv6)) {
1541 src.ipv6 =
1542 nexthop->rmap_src
1543 .ipv6;
1544 setsrc = 1;
1545 } else if (
1546 !IN6_IS_ADDR_UNSPECIFIED(
1547 &nexthop->src
1548 .ipv6)) {
1549 src.ipv6 =
1550 nexthop->src
1551 .ipv6;
1552 setsrc = 1;
1553 }
1554 }
1555 }
1556 continue;
1557 }
1558
1559 if ((cmd == RTM_NEWROUTE
25b9cb0c 1560 && NEXTHOP_IS_ACTIVE(nexthop->flags))
d62a17ae 1561 || (cmd == RTM_DELROUTE
1562 && CHECK_FLAG(nexthop->flags,
1563 NEXTHOP_FLAG_FIB))) {
1564 routedesc = nexthop->rparent
8b1450b9
RW
1565 ? "recursive, multipath"
1566 : "multipath";
d62a17ae 1567 nexthop_num++;
1568
1569 _netlink_route_debug(cmd, p, nexthop, routedesc,
1570 family, zvrf);
1571 _netlink_route_build_multipath(
1572 routedesc, bytelen, nexthop, rta, rtnh,
1573 &req.r, &src1);
1574 rtnh = RTNH_NEXT(rtnh);
1575
1576 if (!setsrc && src1) {
1577 if (family == AF_INET)
1578 src.ipv4 = src1->ipv4;
1579 else if (family == AF_INET6)
1580 src.ipv6 = src1->ipv6;
1581
1582 setsrc = 1;
1583 }
1584 }
1585 }
1586 if (setsrc && (cmd == RTM_NEWROUTE)) {
1587 if (family == AF_INET)
1588 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1589 &src.ipv4, bytelen);
1590 else if (family == AF_INET6)
1591 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1592 &src.ipv6, bytelen);
1593 if (IS_ZEBRA_DEBUG_KERNEL)
1594 zlog_debug("Setting source");
1595 }
1596
1597 if (rta->rta_len > RTA_LENGTH(0))
1598 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1599 RTA_DATA(rta), RTA_PAYLOAD(rta));
1600 }
718e3744 1601
d62a17ae 1602 /* If there is no useful nexthop then return. */
1603 if (nexthop_num == 0) {
1604 if (IS_ZEBRA_DEBUG_KERNEL)
1605 zlog_debug(
1606 "netlink_route_multipath(): No useful nexthop.");
1607 return 0;
1608 }
718e3744 1609
7021c425 1610skip:
718e3744 1611
d62a17ae 1612 /* Destination netlink address. */
1613 memset(&snl, 0, sizeof snl);
1614 snl.nl_family = AF_NETLINK;
718e3744 1615
d62a17ae 1616 /* Talk to netlink socket. */
1617 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1618 0);
718e3744 1619}
1620
43b5cc5e 1621int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
e3be0432 1622{
d62a17ae 1623 int suc = 0;
1624 struct mcast_route_data *mr = (struct mcast_route_data *)in;
bd8b9272
DS
1625 struct {
1626 struct nlmsghdr n;
1627 struct ndmsg ndm;
1628 char buf[256];
1629 } req;
e3be0432 1630
d62a17ae 1631 mroute = mr;
bd8b9272
DS
1632 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1633
1634 memset(&req.n, 0, sizeof(req.n));
1635 memset(&req.ndm, 0, sizeof(req.ndm));
1636
1637 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1638 req.n.nlmsg_flags = NLM_F_REQUEST;
1639 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1640
1641 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1642 req.n.nlmsg_type = RTM_GETROUTE;
1643
1644 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1645 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1646 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1647 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1648 addattr_l(&req.n, sizeof(req), RTA_TABLE, &zvrf->table_id, 4);
e3be0432 1649
bd8b9272
DS
1650 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1651 &zns->netlink_cmd, zns, 0);
e3be0432 1652
bd8b9272 1653 mroute = NULL;
d62a17ae 1654 return suc;
e3be0432
DS
1655}
1656
7d974ba3
DS
1657void kernel_route_rib(struct route_node *rn, struct prefix *p,
1658 struct prefix *src_p, struct route_entry *old,
1659 struct route_entry *new)
718e3744 1660{
0c555cc6
DS
1661 int ret = 0;
1662
0af35d90
RW
1663 assert(old || new);
1664
0c555cc6
DS
1665 if (new) {
1666 if (p->family == AF_INET)
1667 ret = netlink_route_multipath(RTM_NEWROUTE, p, src_p,
1668 new, (old) ? 1 : 0);
1669 else {
1670 /*
1671 * So v6 route replace semantics are not in
1672 * the kernel at this point as I understand it.
1673 * So let's do a delete than an add.
1674 * In the future once v6 route replace semantics
1675 * are in we can figure out what to do here to
1676 * allow working with old and new kernels.
1677 *
1678 * I'm also intentionally ignoring the failure case
1679 * of the route delete. If that happens yeah we're
1680 * screwed.
1681 */
1682 if (old)
1683 netlink_route_multipath(RTM_DELROUTE, p,
1684 src_p, old, 0);
1685 ret = netlink_route_multipath(RTM_NEWROUTE, p,
1686 src_p, new, 0);
1687 }
7d974ba3 1688 kernel_route_rib_pass_fail(rn, p, new,
0c555cc6
DS
1689 (!ret) ?
1690 SOUTHBOUND_INSTALL_SUCCESS :
1691 SOUTHBOUND_INSTALL_FAILURE);
1692 return;
1693 }
718e3744 1694
0c555cc6
DS
1695 if (old) {
1696 ret = netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0);
e7fcb843 1697
7d974ba3 1698 kernel_route_rib_pass_fail(rn, p, old,
0c555cc6
DS
1699 (!ret) ?
1700 SOUTHBOUND_DELETE_SUCCESS :
1701 SOUTHBOUND_DELETE_FAILURE);
1702 }
718e3744 1703}
1704
d62a17ae 1705int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1706 int llalen)
6b8a5694 1707{
d62a17ae 1708 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1709 addr, lla, llalen);
6b8a5694 1710}
718e3744 1711
13d60d35 1712/*
1713 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1714 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1715 */
d62a17ae 1716static int netlink_vxlan_flood_list_update(struct interface *ifp,
1717 struct in_addr *vtep_ip, int cmd)
13d60d35 1718{
d62a17ae 1719 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1720 struct {
1721 struct nlmsghdr n;
1722 struct ndmsg ndm;
1723 char buf[256];
1724 } req;
1725 u_char dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1726
1727 memset(&req.n, 0, sizeof(req.n));
1728 memset(&req.ndm, 0, sizeof(req.ndm));
1729
1730 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1731 req.n.nlmsg_flags = NLM_F_REQUEST;
1732 if (cmd == RTM_NEWNEIGH)
1733 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1734 req.n.nlmsg_type = cmd;
1735 req.ndm.ndm_family = PF_BRIDGE;
1736 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1737 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1738
1739
1740 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1741 req.ndm.ndm_ifindex = ifp->ifindex;
1742 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1743
1744 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1745 0);
13d60d35 1746}
1747
1748/*
d62a17ae 1749 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1750 * adding
13d60d35 1751 * a "flood" MAC FDB entry.
1752 */
d62a17ae 1753int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
13d60d35 1754{
d62a17ae 1755 if (IS_ZEBRA_DEBUG_VXLAN)
1756 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1757 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
13d60d35 1758
d62a17ae 1759 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
13d60d35 1760}
1761
1762/*
1763 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1764 * deleting the "flood" MAC FDB entry.
1765 */
d62a17ae 1766int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
13d60d35 1767{
d62a17ae 1768 if (IS_ZEBRA_DEBUG_VXLAN)
1769 zlog_debug(
1770 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1771 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
13d60d35 1772
d62a17ae 1773 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
13d60d35 1774}
1775
2232a77c 1776#ifndef NDA_RTA
d62a17ae 1777#define NDA_RTA(r) \
1778 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2232a77c 1779#endif
1780
d62a17ae 1781static int netlink_macfdb_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
1782 int len)
2232a77c 1783{
d62a17ae 1784 struct ndmsg *ndm;
1785 struct interface *ifp;
1786 struct zebra_if *zif;
d62a17ae 1787 struct rtattr *tb[NDA_MAX + 1];
1788 struct interface *br_if;
1789 struct ethaddr mac;
1790 vlanid_t vid = 0;
1791 struct prefix vtep_ip;
1792 int vid_present = 0, dst_present = 0;
1793 char buf[ETHER_ADDR_STRLEN];
1794 char vid_buf[20];
1795 char dst_buf[30];
1796 u_char sticky = 0;
1797
1798 ndm = NLMSG_DATA(h);
1799
2853fed6 1800 /* We only process macfdb notifications if EVPN is enabled */
1801 if (!is_evpn_enabled())
1802 return 0;
1803
d62a17ae 1804 /* The interface should exist. */
1805 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
1806 ndm->ndm_ifindex);
2853fed6 1807 if (!ifp || !ifp->info)
d62a17ae 1808 return 0;
1809
1810 /* The interface should be something we're interested in. */
1811 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
1812 return 0;
1813
1814 /* Drop "permanent" entries. */
1815 if (ndm->ndm_state & NUD_PERMANENT)
1816 return 0;
1817
1818 zif = (struct zebra_if *)ifp->info;
1819 if ((br_if = zif->brslave_info.br_if) == NULL) {
1820 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1821 nl_msg_type_to_str(h->nlmsg_type),
1822 nl_family_to_str(ndm->ndm_family), ifp->name,
1823 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1824 return 0;
1825 }
1826
1827 /* Parse attributes and extract fields of interest. */
1828 memset(tb, 0, sizeof tb);
1829 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
1830
1831 if (!tb[NDA_LLADDR]) {
1832 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1833 nl_msg_type_to_str(h->nlmsg_type),
1834 nl_family_to_str(ndm->ndm_family), ifp->name,
1835 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1836 return 0;
1837 }
1838
ff8b7eb8 1839 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
d62a17ae 1840 zlog_warn(
df0b13cf 1841 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
d62a17ae 1842 nl_msg_type_to_str(h->nlmsg_type),
1843 nl_family_to_str(ndm->ndm_family), ifp->name,
1844 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex,
df0b13cf 1845 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 1846 return 0;
1847 }
1848
ff8b7eb8 1849 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 1850
1851 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
1852 vid_present = 1;
1853 vid = *(u_int16_t *)RTA_DATA(tb[NDA_VLAN]);
1854 sprintf(vid_buf, " VLAN %u", vid);
1855 }
1856
1857 if (tb[NDA_DST]) {
1858 /* TODO: Only IPv4 supported now. */
1859 dst_present = 1;
1860 vtep_ip.family = AF_INET;
1861 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
1862 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
1863 IPV4_MAX_BYTELEN);
1864 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
1865 }
1866
1867 sticky = (ndm->ndm_state & NUD_NOARP) ? 1 : 0;
1868
1869 if (IS_ZEBRA_DEBUG_KERNEL)
1870 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1871 nl_msg_type_to_str(h->nlmsg_type),
1872 nl_family_to_str(ndm->ndm_family), ifp->name,
1873 ndm->ndm_ifindex, vid_present ? vid_buf : "",
1874 sticky ? "sticky " : "",
1875 prefix_mac2str(&mac, buf, sizeof(buf)),
1876 dst_present ? dst_buf : "");
1877
1878 if (filter_vlan && vid != filter_vlan)
1879 return 0;
1880
1881 /* If add or update, do accordingly if learnt on a "local" interface; if
1882 * the notification is over VxLAN, this has to be related to
1883 * multi-homing,
1884 * so perform an implicit delete of any local entry (if it exists).
1885 */
1886 if (h->nlmsg_type == RTM_NEWNEIGH) {
1887 /* Drop "permanent" entries. */
1888 if (ndm->ndm_state & NUD_PERMANENT)
1889 return 0;
1890
1891 if (IS_ZEBRA_IF_VXLAN(ifp))
1892 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
1893 vid);
1894
1895 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
1896 sticky);
1897 }
1898
1899 /* This is a delete notification.
1900 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1901 * 2. For a MAC over "local" interface, delete the mac
1902 * Note: We will get notifications from both bridge driver and VxLAN
1903 * driver.
1904 * Ignore the notification from VxLan driver as it is also generated
1905 * when mac moves from remote to local.
1906 */
1907 if (dst_present)
1908 return 0;
1909
1910 if (IS_ZEBRA_IF_VXLAN(ifp))
1911 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
1912 vid);
1913
1914 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2232a77c 1915}
1916
d62a17ae 1917static int netlink_macfdb_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
1918 ns_id_t ns_id, int startup)
2232a77c 1919{
d62a17ae 1920 int len;
1921 struct ndmsg *ndm;
2232a77c 1922
d62a17ae 1923 if (h->nlmsg_type != RTM_NEWNEIGH)
1924 return 0;
2232a77c 1925
d62a17ae 1926 /* Length validity. */
1927 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
1928 if (len < 0)
1929 return -1;
2232a77c 1930
d62a17ae 1931 /* We are interested only in AF_BRIDGE notifications. */
1932 ndm = NLMSG_DATA(h);
1933 if (ndm->ndm_family != AF_BRIDGE)
1934 return 0;
2232a77c 1935
d62a17ae 1936 return netlink_macfdb_change(snl, h, len);
2232a77c 1937}
1938
1939/* Request for MAC FDB information from the kernel */
d62a17ae 1940static int netlink_request_macs(struct zebra_ns *zns, int family, int type,
1941 ifindex_t master_ifindex)
2232a77c 1942{
d62a17ae 1943 struct {
1944 struct nlmsghdr n;
1945 struct ifinfomsg ifm;
1946 char buf[256];
1947 } req;
1948
1949 /* Form the request, specifying filter (rtattr) if needed. */
1950 memset(&req, 0, sizeof(req));
1951 req.n.nlmsg_type = type;
1952 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1953 req.ifm.ifi_family = family;
1954 if (master_ifindex)
1955 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
1956
1957 return netlink_request(&zns->netlink_cmd, &req.n);
2232a77c 1958}
1959
1960/*
1961 * MAC forwarding database read using netlink interface. This is invoked
1962 * at startup.
1963 */
d62a17ae 1964int netlink_macfdb_read(struct zebra_ns *zns)
2232a77c 1965{
d62a17ae 1966 int ret;
1967
1968 /* Get bridge FDB table. */
1969 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH, 0);
1970 if (ret < 0)
1971 return ret;
1972 /* We are reading entire table. */
1973 filter_vlan = 0;
1974 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1975 0, 1);
1976
1977 return ret;
2232a77c 1978}
1979
1980/*
1981 * MAC forwarding database read using netlink interface. This is for a
1982 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1983 */
d62a17ae 1984int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
1985 struct interface *br_if)
2232a77c 1986{
d62a17ae 1987 struct zebra_if *br_zif;
1988 struct zebra_if *zif;
1989 struct zebra_l2info_vxlan *vxl;
1990 int ret = 0;
1991
1992
1993 /* Save VLAN we're filtering on, if needed. */
1994 br_zif = (struct zebra_if *)br_if->info;
1995 zif = (struct zebra_if *)ifp->info;
1996 vxl = &zif->l2info.vxl;
1997 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
1998 filter_vlan = vxl->access_vlan;
1999
2000 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2001 */
2002 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH,
2003 br_if->ifindex);
2004 if (ret < 0)
2005 return ret;
2006 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
2007 0, 0);
2008
2009 /* Reset VLAN filter. */
2010 filter_vlan = 0;
2011 return ret;
2232a77c 2012}
2013
d62a17ae 2014static int netlink_macfdb_update(struct interface *ifp, vlanid_t vid,
2015 struct ethaddr *mac, struct in_addr vtep_ip,
2016 int local, int cmd, u_char sticky)
2232a77c 2017{
d62a17ae 2018 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2019 struct {
2020 struct nlmsghdr n;
2021 struct ndmsg ndm;
2022 char buf[256];
2023 } req;
2024 int dst_alen;
2025 struct zebra_if *zif;
2026 struct interface *br_if;
2027 struct zebra_if *br_zif;
2028 char buf[ETHER_ADDR_STRLEN];
2029 int vid_present = 0, dst_present = 0;
2030 char vid_buf[20];
2031 char dst_buf[30];
2032
2033 zif = ifp->info;
2034 if ((br_if = zif->brslave_info.br_if) == NULL) {
2035 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
2036 (cmd == RTM_NEWNEIGH) ? "add" : "del", ifp->name,
2037 ifp->ifindex);
2038 return -1;
2039 }
2040
2041 memset(&req.n, 0, sizeof(req.n));
2042 memset(&req.ndm, 0, sizeof(req.ndm));
2043
2044 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2045 req.n.nlmsg_flags = NLM_F_REQUEST;
2046 if (cmd == RTM_NEWNEIGH)
2047 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2048 req.n.nlmsg_type = cmd;
2049 req.ndm.ndm_family = AF_BRIDGE;
2050 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2051 req.ndm.ndm_state = NUD_REACHABLE;
2052
2053 if (sticky)
2054 req.ndm.ndm_state |= NUD_NOARP;
2055 else
2056 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2057
2058 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2059 req.ndm.ndm_ifindex = ifp->ifindex;
2060 if (!local) {
2061 dst_alen = 4; // TODO: hardcoded
2062 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2063 dst_present = 1;
2064 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
2065 }
2066 br_zif = (struct zebra_if *)br_if->info;
2067 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2068 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2069 vid_present = 1;
2070 sprintf(vid_buf, " VLAN %u", vid);
2071 }
2072 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2073
2074 if (IS_ZEBRA_DEBUG_KERNEL)
2075 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2076 nl_msg_type_to_str(cmd),
2077 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2078 ifp->ifindex, vid_present ? vid_buf : "",
2079 sticky ? "sticky " : "",
2080 prefix_mac2str(mac, buf, sizeof(buf)),
2081 dst_present ? dst_buf : "");
2082
2083 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2084 0);
2232a77c 2085}
2086
d62a17ae 2087#define NUD_VALID \
2088 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2089 | NUD_DELAY)
2232a77c 2090
d62a17ae 2091static int netlink_ipneigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2092 int len)
2232a77c 2093{
d62a17ae 2094 struct ndmsg *ndm;
2095 struct interface *ifp;
2096 struct zebra_if *zif;
d62a17ae 2097 struct rtattr *tb[NDA_MAX + 1];
2098 struct interface *link_if;
2099 struct ethaddr mac;
2100 struct ipaddr ip;
2101 char buf[ETHER_ADDR_STRLEN];
2102 char buf2[INET6_ADDRSTRLEN];
2103 int mac_present = 0;
2104 u_char ext_learned;
2105
2106 ndm = NLMSG_DATA(h);
2107
2853fed6 2108 /* We only process neigh notifications if EVPN is enabled */
2109 if (!is_evpn_enabled())
2110 return 0;
2111
d62a17ae 2112 /* The interface should exist. */
2113 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2114 ndm->ndm_ifindex);
2853fed6 2115 if (!ifp || !ifp->info)
d62a17ae 2116 return 0;
2117
2118 /* Drop "permanent" entries. */
2119 if (ndm->ndm_state & NUD_PERMANENT)
2120 return 0;
2121
2122 zif = (struct zebra_if *)ifp->info;
2123 /* The neighbor is present on an SVI. From this, we locate the
2124 * underlying
2125 * bridge because we're only interested in neighbors on a VxLAN bridge.
2126 * The bridge is located based on the nature of the SVI:
2127 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2128 * interface
2129 * and is linked to the bridge
2130 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2131 * inteface
2132 * itself
2133 */
2134 if (IS_ZEBRA_IF_VLAN(ifp)) {
71349e03
MK
2135 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2136 zif->link_ifindex);
d62a17ae 2137 if (!link_if)
2138 return 0;
2139 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2140 link_if = ifp;
2141 else
2142 return 0;
2143
2144 /* Parse attributes and extract fields of interest. */
2145 memset(tb, 0, sizeof tb);
2146 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2147
2148 if (!tb[NDA_DST]) {
2149 zlog_warn("%s family %s IF %s(%u) - no DST",
2150 nl_msg_type_to_str(h->nlmsg_type),
2151 nl_family_to_str(ndm->ndm_family), ifp->name,
2152 ndm->ndm_ifindex);
2153 return 0;
2154 }
2155 memset(&mac, 0, sizeof(struct ethaddr));
2156 memset(&ip, 0, sizeof(struct ipaddr));
2157 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2158 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2159
2160 if (h->nlmsg_type == RTM_NEWNEIGH) {
2161 if (tb[NDA_LLADDR]) {
ff8b7eb8 2162 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
d62a17ae 2163 zlog_warn(
df0b13cf 2164 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
d62a17ae 2165 nl_msg_type_to_str(h->nlmsg_type),
2166 nl_family_to_str(ndm->ndm_family),
2167 ifp->name, ndm->ndm_ifindex,
df0b13cf 2168 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 2169 return 0;
2170 }
2171
2172 mac_present = 1;
ff8b7eb8 2173 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 2174 }
2175
2176 ext_learned = (ndm->ndm_flags & NTF_EXT_LEARNED) ? 1 : 0;
2177
2178 if (IS_ZEBRA_DEBUG_KERNEL)
2179 zlog_debug(
2180 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2181 nl_msg_type_to_str(h->nlmsg_type),
2182 nl_family_to_str(ndm->ndm_family), ifp->name,
2183 ndm->ndm_ifindex,
2184 ipaddr2str(&ip, buf2, sizeof(buf2)),
2185 mac_present
2186 ? prefix_mac2str(&mac, buf, sizeof(buf))
2187 : "",
2188 ndm->ndm_state, ndm->ndm_flags);
2189
2190 /* If the neighbor state is valid for use, process as an add or
2191 * update
2192 * else process as a delete. Note that the delete handling may
2193 * result
2194 * in re-adding the neighbor if it is a valid "remote" neighbor.
2195 */
2196 if (ndm->ndm_state & NUD_VALID)
2197 return zebra_vxlan_local_neigh_add_update(
2198 ifp, link_if, &ip, &mac, ndm->ndm_state,
2199 ext_learned);
2200
2201 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2202 }
2203
2204 if (IS_ZEBRA_DEBUG_KERNEL)
2205 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2206 nl_msg_type_to_str(h->nlmsg_type),
2207 nl_family_to_str(ndm->ndm_family), ifp->name,
2208 ndm->ndm_ifindex,
2209 ipaddr2str(&ip, buf2, sizeof(buf2)));
2210
2211 /* Process the delete - it may result in re-adding the neighbor if it is
2212 * a valid "remote" neighbor.
2213 */
2214 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2232a77c 2215}
2216
d62a17ae 2217static int netlink_neigh_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
2218 ns_id_t ns_id, int startup)
2232a77c 2219{
d62a17ae 2220 int len;
2221 struct ndmsg *ndm;
2232a77c 2222
d62a17ae 2223 if (h->nlmsg_type != RTM_NEWNEIGH)
2224 return 0;
2232a77c 2225
d62a17ae 2226 /* Length validity. */
2227 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2228 if (len < 0)
2229 return -1;
2232a77c 2230
d62a17ae 2231 /* We are interested only in AF_INET or AF_INET6 notifications. */
2232 ndm = NLMSG_DATA(h);
2233 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2234 return 0;
2232a77c 2235
d62a17ae 2236 return netlink_neigh_change(snl, h, len);
2232a77c 2237}
2238
2239/* Request for IP neighbor information from the kernel */
d62a17ae 2240static int netlink_request_neigh(struct zebra_ns *zns, int family, int type,
2241 ifindex_t ifindex)
2232a77c 2242{
d62a17ae 2243 struct {
2244 struct nlmsghdr n;
2245 struct ndmsg ndm;
2246 char buf[256];
2247 } req;
2248
2249 /* Form the request, specifying filter (rtattr) if needed. */
2250 memset(&req, 0, sizeof(req));
2251 req.n.nlmsg_type = type;
2252 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2253 req.ndm.ndm_family = family;
2254 if (ifindex)
2255 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2256
2257 return netlink_request(&zns->netlink_cmd, &req.n);
2232a77c 2258}
2259
2260/*
2261 * IP Neighbor table read using netlink interface. This is invoked
2262 * at startup.
2263 */
d62a17ae 2264int netlink_neigh_read(struct zebra_ns *zns)
2232a77c 2265{
d62a17ae 2266 int ret;
2232a77c 2267
d62a17ae 2268 /* Get IP neighbor table. */
2269 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH, 0);
2270 if (ret < 0)
2271 return ret;
2272 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2273 1);
2232a77c 2274
d62a17ae 2275 return ret;
2232a77c 2276}
2277
2278/*
2279 * IP Neighbor table read using netlink interface. This is for a specific
2280 * VLAN device.
2281 */
d62a17ae 2282int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2232a77c 2283{
d62a17ae 2284 int ret = 0;
2232a77c 2285
d62a17ae 2286 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH,
2287 vlan_if->ifindex);
2288 if (ret < 0)
2289 return ret;
2290 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2291 0);
2232a77c 2292
d62a17ae 2293 return ret;
2232a77c 2294}
2295
d62a17ae 2296int netlink_neigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2297 ns_id_t ns_id)
2232a77c 2298{
d62a17ae 2299 int len;
2300 struct ndmsg *ndm;
2232a77c 2301
d62a17ae 2302 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2303 return 0;
2232a77c 2304
d62a17ae 2305 /* Length validity. */
2306 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2307 if (len < 0)
2308 return -1;
2232a77c 2309
d62a17ae 2310 /* Is this a notification for the MAC FDB or IP neighbor table? */
2311 ndm = NLMSG_DATA(h);
2312 if (ndm->ndm_family == AF_BRIDGE)
2313 return netlink_macfdb_change(snl, h, len);
2232a77c 2314
d62a17ae 2315 if (ndm->ndm_type != RTN_UNICAST)
2316 return 0;
2232a77c 2317
d62a17ae 2318 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2319 return netlink_ipneigh_change(snl, h, len);
2232a77c 2320
d62a17ae 2321 return 0;
2232a77c 2322}
2323
d62a17ae 2324static int netlink_neigh_update2(struct interface *ifp, struct ipaddr *ip,
2325 struct ethaddr *mac, u_int32_t flags, int cmd)
2232a77c 2326{
d62a17ae 2327 struct {
2328 struct nlmsghdr n;
2329 struct ndmsg ndm;
2330 char buf[256];
2331 } req;
2332 int ipa_len;
2333
2334 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2335 char buf[INET6_ADDRSTRLEN];
2336 char buf2[ETHER_ADDR_STRLEN];
2337
2338 memset(&req.n, 0, sizeof(req.n));
2339 memset(&req.ndm, 0, sizeof(req.ndm));
2340
2341 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2342 req.n.nlmsg_flags = NLM_F_REQUEST;
2343 if (cmd == RTM_NEWNEIGH)
2344 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2345 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2346 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2347 req.ndm.ndm_state = flags;
2348 req.ndm.ndm_ifindex = ifp->ifindex;
2349 req.ndm.ndm_type = RTN_UNICAST;
2350 req.ndm.ndm_flags = NTF_EXT_LEARNED;
2351
2352
2353 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2354 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2355 if (mac)
2356 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2357
2358 if (IS_ZEBRA_DEBUG_KERNEL)
2359 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2360 nl_msg_type_to_str(cmd),
2361 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2362 ifp->ifindex, ipaddr2str(ip, buf, sizeof(buf)),
2363 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2364 : "null");
2365
2366 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2367 0);
2232a77c 2368}
2369
d62a17ae 2370int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2371 struct in_addr vtep_ip, u_char sticky)
2232a77c 2372{
d62a17ae 2373 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, 0, RTM_NEWNEIGH,
2374 sticky);
2232a77c 2375}
2376
d62a17ae 2377int kernel_del_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2378 struct in_addr vtep_ip, int local)
2232a77c 2379{
d62a17ae 2380 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, local,
2381 RTM_DELNEIGH, 0);
2232a77c 2382}
2383
d62a17ae 2384int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
2385 struct ethaddr *mac)
2232a77c 2386{
d62a17ae 2387 return netlink_neigh_update2(ifp, ip, mac, NUD_REACHABLE, RTM_NEWNEIGH);
2232a77c 2388}
2389
d62a17ae 2390int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip)
2232a77c 2391{
d62a17ae 2392 return netlink_neigh_update2(ifp, ip, NULL, 0, RTM_DELNEIGH);
2232a77c 2393}
2394
40c7bdb0 2395/*
2396 * MPLS label forwarding table change via netlink interface.
2397 */
d62a17ae 2398int netlink_mpls_multipath(int cmd, zebra_lsp_t *lsp)
40c7bdb0 2399{
d62a17ae 2400 mpls_lse_t lse;
2401 zebra_nhlfe_t *nhlfe;
2402 struct nexthop *nexthop = NULL;
2403 unsigned int nexthop_num;
2404 const char *routedesc;
2405 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
805444ce 2406 int route_type;
d62a17ae 2407
2408 struct {
2409 struct nlmsghdr n;
2410 struct rtmsg r;
2411 char buf[NL_PKT_BUF_SIZE];
2412 } req;
2413
2414 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
2415
d62a17ae 2416 /*
2417 * Count # nexthops so we can decide whether to use singlepath
2418 * or multipath case.
2419 */
2420 nexthop_num = 0;
2421 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2422 nexthop = nhlfe->nexthop;
2423 if (!nexthop)
2424 continue;
2425 if (cmd == RTM_NEWROUTE) {
2426 /* Count all selected NHLFEs */
2427 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2428 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2429 nexthop_num++;
2430 } else /* DEL */
2431 {
2432 /* Count all installed NHLFEs */
2433 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2434 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2435 nexthop_num++;
2436 }
2437 }
2438
8dc8a4b6 2439 if ((nexthop_num == 0) || (!lsp->best_nhlfe && (cmd != RTM_DELROUTE)))
d62a17ae 2440 return 0;
2441
2442 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2443 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2444 req.n.nlmsg_type = cmd;
2445 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2446
2447 req.r.rtm_family = AF_MPLS;
2448 req.r.rtm_table = RT_TABLE_MAIN;
2449 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
d62a17ae 2450 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2451 req.r.rtm_type = RTN_UNICAST;
2452
8dc8a4b6 2453 if (cmd == RTM_NEWROUTE) {
d62a17ae 2454 /* We do a replace to handle update. */
2455 req.n.nlmsg_flags |= NLM_F_REPLACE;
2456
8dc8a4b6
DS
2457 /* set the protocol value if installing */
2458 route_type = re_type_from_lsp_type(lsp->best_nhlfe->type);
2459 req.r.rtm_protocol = zebra2proto(route_type);
2460 }
2461
d62a17ae 2462 /* Fill destination */
2463 lse = mpls_lse_encode(lsp->ile.in_label, 0, 0, 1);
2464 addattr_l(&req.n, sizeof req, RTA_DST, &lse, sizeof(mpls_lse_t));
2465
2466 /* Fill nexthops (paths) based on single-path or multipath. The paths
2467 * chosen depend on the operation.
2468 */
2469 if (nexthop_num == 1 || multipath_num == 1) {
8b1450b9 2470 routedesc = "single-path";
d62a17ae 2471 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2472
2473 nexthop_num = 0;
2474 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2475 nexthop = nhlfe->nexthop;
2476 if (!nexthop)
2477 continue;
2478
2479 if ((cmd == RTM_NEWROUTE
2480 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2481 && CHECK_FLAG(nexthop->flags,
2482 NEXTHOP_FLAG_ACTIVE)))
2483 || (cmd == RTM_DELROUTE
2484 && (CHECK_FLAG(nhlfe->flags,
2485 NHLFE_FLAG_INSTALLED)
2486 && CHECK_FLAG(nexthop->flags,
2487 NEXTHOP_FLAG_FIB)))) {
2488 /* Add the gateway */
2489 _netlink_mpls_build_singlepath(routedesc, nhlfe,
2490 &req.n, &req.r,
2491 sizeof req, cmd);
d62a17ae 2492 nexthop_num++;
2493 break;
2494 }
2495 }
2496 } else /* Multipath case */
2497 {
2498 char buf[NL_PKT_BUF_SIZE];
2499 struct rtattr *rta = (void *)buf;
2500 struct rtnexthop *rtnh;
2501 union g_addr *src1 = NULL;
2502
2503 rta->rta_type = RTA_MULTIPATH;
2504 rta->rta_len = RTA_LENGTH(0);
2505 rtnh = RTA_DATA(rta);
2506
8b1450b9 2507 routedesc = "multipath";
d62a17ae 2508 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2509
2510 nexthop_num = 0;
2511 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2512 nexthop = nhlfe->nexthop;
2513 if (!nexthop)
2514 continue;
2515
2516 if (nexthop_num >= multipath_num)
2517 break;
2518
2519 if ((cmd == RTM_NEWROUTE
2520 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2521 && CHECK_FLAG(nexthop->flags,
2522 NEXTHOP_FLAG_ACTIVE)))
2523 || (cmd == RTM_DELROUTE
2524 && (CHECK_FLAG(nhlfe->flags,
2525 NHLFE_FLAG_INSTALLED)
2526 && CHECK_FLAG(nexthop->flags,
2527 NEXTHOP_FLAG_FIB)))) {
2528 nexthop_num++;
2529
2530 /* Build the multipath */
2531 _netlink_mpls_build_multipath(routedesc, nhlfe,
2532 rta, rtnh, &req.r,
2533 &src1);
2534 rtnh = RTNH_NEXT(rtnh);
d62a17ae 2535 }
2536 }
2537
2538 /* Add the multipath */
2539 if (rta->rta_len > RTA_LENGTH(0))
2540 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
2541 RTA_DATA(rta), RTA_PAYLOAD(rta));
2542 }
2543
2544 /* Talk to netlink socket. */
2545 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2546 0);
40c7bdb0 2547}
ddfeb486 2548#endif /* HAVE_NETLINK */