]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
Merge pull request #4877 from mjstapp/dplane_neighs
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26 #include <linux/lwtunnel.h>
27 #include <linux/mpls_iptunnel.h>
28 #include <linux/neighbour.h>
29 #include <linux/rtnetlink.h>
30
31 /* Hack for GNU libc version 2. */
32 #ifndef MSG_TRUNC
33 #define MSG_TRUNC 0x20
34 #endif /* MSG_TRUNC */
35
36 #include "linklist.h"
37 #include "if.h"
38 #include "log.h"
39 #include "prefix.h"
40 #include "connected.h"
41 #include "table.h"
42 #include "memory.h"
43 #include "zebra_memory.h"
44 #include "rib.h"
45 #include "thread.h"
46 #include "privs.h"
47 #include "nexthop.h"
48 #include "vrf.h"
49 #include "vty.h"
50 #include "mpls.h"
51 #include "vxlan.h"
52
53 #include "zebra/zapi_msg.h"
54 #include "zebra/zebra_ns.h"
55 #include "zebra/zebra_vrf.h"
56 #include "zebra/rt.h"
57 #include "zebra/redistribute.h"
58 #include "zebra/interface.h"
59 #include "zebra/debug.h"
60 #include "zebra/rtadv.h"
61 #include "zebra/zebra_ptm.h"
62 #include "zebra/zebra_mpls.h"
63 #include "zebra/kernel_netlink.h"
64 #include "zebra/rt_netlink.h"
65 #include "zebra/zebra_mroute.h"
66 #include "zebra/zebra_vxlan.h"
67 #include "zebra/zebra_errors.h"
68
69 #ifndef AF_MPLS
70 #define AF_MPLS 28
71 #endif
72
73 static vlanid_t filter_vlan = 0;
74
75 struct gw_family_t {
76 uint16_t filler;
77 uint16_t family;
78 union g_addr gate;
79 };
80
81 char ipv4_ll_buf[16] = "169.254.0.1";
82 struct in_addr ipv4_ll;
83
84 /*
85 * The ipv4_ll data structure is used for all 5549
86 * additions to the kernel. Let's figure out the
87 * correct value one time instead for every
88 * install/remove of a 5549 type route
89 */
90 void rt_netlink_init(void)
91 {
92 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
93 }
94
95 /*
96 * Mapping from dataplane neighbor flags to netlink flags
97 */
98 static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
99 {
100 uint8_t flags = 0;
101
102 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
103 flags |= NTF_EXT_LEARNED;
104 if (dplane_flags & DPLANE_NTF_ROUTER)
105 flags |= NTF_ROUTER;
106
107 return flags;
108 }
109
110 /*
111 * Mapping from dataplane neighbor state to netlink state
112 */
113 static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
114 {
115 uint16_t state = 0;
116
117 if (dplane_state & DPLANE_NUD_REACHABLE)
118 state |= NUD_REACHABLE;
119 if (dplane_state & DPLANE_NUD_STALE)
120 state |= NUD_STALE;
121 if (dplane_state & DPLANE_NUD_NOARP)
122 state |= NUD_NOARP;
123 if (dplane_state & DPLANE_NUD_PROBE)
124 state |= NUD_PROBE;
125
126 return state;
127 }
128
129
130 static inline int is_selfroute(int proto)
131 {
132 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
133 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
134 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
135 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
136 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
137 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
138 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)) {
139 return 1;
140 }
141
142 return 0;
143 }
144
145 static inline int zebra2proto(int proto)
146 {
147 switch (proto) {
148 case ZEBRA_ROUTE_BABEL:
149 proto = RTPROT_BABEL;
150 break;
151 case ZEBRA_ROUTE_BGP:
152 proto = RTPROT_BGP;
153 break;
154 case ZEBRA_ROUTE_OSPF:
155 case ZEBRA_ROUTE_OSPF6:
156 proto = RTPROT_OSPF;
157 break;
158 case ZEBRA_ROUTE_STATIC:
159 proto = RTPROT_ZSTATIC;
160 break;
161 case ZEBRA_ROUTE_ISIS:
162 proto = RTPROT_ISIS;
163 break;
164 case ZEBRA_ROUTE_RIP:
165 proto = RTPROT_RIP;
166 break;
167 case ZEBRA_ROUTE_RIPNG:
168 proto = RTPROT_RIPNG;
169 break;
170 case ZEBRA_ROUTE_NHRP:
171 proto = RTPROT_NHRP;
172 break;
173 case ZEBRA_ROUTE_EIGRP:
174 proto = RTPROT_EIGRP;
175 break;
176 case ZEBRA_ROUTE_LDP:
177 proto = RTPROT_LDP;
178 break;
179 case ZEBRA_ROUTE_SHARP:
180 proto = RTPROT_SHARP;
181 break;
182 case ZEBRA_ROUTE_PBR:
183 proto = RTPROT_PBR;
184 break;
185 case ZEBRA_ROUTE_OPENFABRIC:
186 proto = RTPROT_OPENFABRIC;
187 break;
188 case ZEBRA_ROUTE_TABLE:
189 proto = RTPROT_ZEBRA;
190 break;
191 default:
192 /*
193 * When a user adds a new protocol this will show up
194 * to let them know to do something about it. This
195 * is intentionally a warn because we should see
196 * this as part of development of a new protocol
197 */
198 zlog_debug(
199 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
200 __PRETTY_FUNCTION__, proto);
201 proto = RTPROT_ZEBRA;
202 break;
203 }
204
205 return proto;
206 }
207
208 static inline int proto2zebra(int proto, int family)
209 {
210 switch (proto) {
211 case RTPROT_BABEL:
212 proto = ZEBRA_ROUTE_BABEL;
213 break;
214 case RTPROT_BGP:
215 proto = ZEBRA_ROUTE_BGP;
216 break;
217 case RTPROT_OSPF:
218 proto = (family == AFI_IP) ? ZEBRA_ROUTE_OSPF
219 : ZEBRA_ROUTE_OSPF6;
220 break;
221 case RTPROT_ISIS:
222 proto = ZEBRA_ROUTE_ISIS;
223 break;
224 case RTPROT_RIP:
225 proto = ZEBRA_ROUTE_RIP;
226 break;
227 case RTPROT_RIPNG:
228 proto = ZEBRA_ROUTE_RIPNG;
229 break;
230 case RTPROT_NHRP:
231 proto = ZEBRA_ROUTE_NHRP;
232 break;
233 case RTPROT_EIGRP:
234 proto = ZEBRA_ROUTE_EIGRP;
235 break;
236 case RTPROT_LDP:
237 proto = ZEBRA_ROUTE_LDP;
238 break;
239 case RTPROT_STATIC:
240 case RTPROT_ZSTATIC:
241 proto = ZEBRA_ROUTE_STATIC;
242 break;
243 case RTPROT_SHARP:
244 proto = ZEBRA_ROUTE_SHARP;
245 break;
246 case RTPROT_PBR:
247 proto = ZEBRA_ROUTE_PBR;
248 break;
249 case RTPROT_OPENFABRIC:
250 proto = ZEBRA_ROUTE_OPENFABRIC;
251 break;
252 default:
253 /*
254 * When a user adds a new protocol this will show up
255 * to let them know to do something about it. This
256 * is intentionally a warn because we should see
257 * this as part of development of a new protocol
258 */
259 zlog_debug(
260 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
261 __PRETTY_FUNCTION__, proto);
262 proto = ZEBRA_ROUTE_KERNEL;
263 break;
264 }
265 return proto;
266 }
267
268 /*
269 Pending: create an efficient table_id (in a tree/hash) based lookup)
270 */
271 static vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
272 {
273 struct vrf *vrf;
274 struct zebra_vrf *zvrf;
275
276 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
277 zvrf = vrf->info;
278 if (zvrf == NULL)
279 continue;
280 /* case vrf with netns : match the netnsid */
281 if (vrf_is_backend_netns()) {
282 if (ns_id == zvrf_id(zvrf))
283 return zvrf_id(zvrf);
284 } else {
285 /* VRF is VRF_BACKEND_VRF_LITE */
286 if (zvrf->table_id != table_id)
287 continue;
288 return zvrf_id(zvrf);
289 }
290 }
291
292 return VRF_DEFAULT;
293 }
294
295 /**
296 * @parse_encap_mpls() - Parses encapsulated mpls attributes
297 * @tb: Pointer to rtattr to look for nested items in.
298 * @labels: Pointer to store labels in.
299 *
300 * Return: Number of mpls labels found.
301 */
302 static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
303 {
304 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
305 mpls_lse_t *lses = NULL;
306 int num_labels = 0;
307 uint32_t ttl = 0;
308 uint32_t bos = 0;
309 uint32_t exp = 0;
310 mpls_label_t label = 0;
311
312 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
313 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
314 while (!bos && num_labels < MPLS_MAX_LABELS) {
315 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
316 labels[num_labels++] = label;
317 }
318
319 return num_labels;
320 }
321
322 /* Looking up routing table by netlink interface. */
323 static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
324 int startup)
325 {
326 int len;
327 struct rtmsg *rtm;
328 struct rtattr *tb[RTA_MAX + 1];
329 uint8_t flags = 0;
330 struct prefix p;
331 struct prefix_ipv6 src_p = {};
332 vrf_id_t vrf_id;
333
334 char anyaddr[16] = {0};
335
336 int proto = ZEBRA_ROUTE_KERNEL;
337 int index = 0;
338 int table;
339 int metric = 0;
340 uint32_t mtu = 0;
341 uint8_t distance = 0;
342 route_tag_t tag = 0;
343
344 void *dest = NULL;
345 void *gate = NULL;
346 void *prefsrc = NULL; /* IPv4 preferred source host address */
347 void *src = NULL; /* IPv6 srcdest source prefix */
348 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
349
350 /* MPLS labels */
351 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
352 int num_labels = 0;
353
354 rtm = NLMSG_DATA(h);
355
356 if (startup && h->nlmsg_type != RTM_NEWROUTE)
357 return 0;
358 switch (rtm->rtm_type) {
359 case RTN_UNICAST:
360 break;
361 case RTN_BLACKHOLE:
362 bh_type = BLACKHOLE_NULL;
363 break;
364 case RTN_UNREACHABLE:
365 bh_type = BLACKHOLE_REJECT;
366 break;
367 case RTN_PROHIBIT:
368 bh_type = BLACKHOLE_ADMINPROHIB;
369 break;
370 default:
371 if (IS_ZEBRA_DEBUG_KERNEL)
372 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
373 nl_rttype_to_str(rtm->rtm_type),
374 rtm->rtm_type);
375 return 0;
376 }
377
378 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
379 if (len < 0) {
380 zlog_err("%s: Message received from netlink is of a broken size %d %zu",
381 __PRETTY_FUNCTION__, h->nlmsg_len,
382 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
383 return -1;
384 }
385
386 memset(tb, 0, sizeof tb);
387 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
388
389 if (rtm->rtm_flags & RTM_F_CLONED)
390 return 0;
391 if (rtm->rtm_protocol == RTPROT_REDIRECT)
392 return 0;
393 if (rtm->rtm_protocol == RTPROT_KERNEL)
394 return 0;
395
396 if (!startup && is_selfroute(rtm->rtm_protocol)
397 && h->nlmsg_type == RTM_NEWROUTE) {
398 if (IS_ZEBRA_DEBUG_KERNEL)
399 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
400 rtm->rtm_protocol);
401 return 0;
402 }
403
404 /* We don't care about change notifications for the MPLS table. */
405 /* TODO: Revisit this. */
406 if (rtm->rtm_family == AF_MPLS)
407 return 0;
408
409 /* Table corresponding to route. */
410 if (tb[RTA_TABLE])
411 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
412 else
413 table = rtm->rtm_table;
414
415 /* Map to VRF */
416 vrf_id = vrf_lookup_by_table(table, ns_id);
417 if (vrf_id == VRF_DEFAULT) {
418 if (!is_zebra_valid_kernel_table(table)
419 && !is_zebra_main_routing_table(table))
420 return 0;
421 }
422
423 /* Route which inserted by Zebra. */
424 if (is_selfroute(rtm->rtm_protocol)) {
425 flags |= ZEBRA_FLAG_SELFROUTE;
426 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
427 }
428 if (tb[RTA_OIF])
429 index = *(int *)RTA_DATA(tb[RTA_OIF]);
430
431 if (tb[RTA_DST])
432 dest = RTA_DATA(tb[RTA_DST]);
433 else
434 dest = anyaddr;
435
436 if (tb[RTA_SRC])
437 src = RTA_DATA(tb[RTA_SRC]);
438 else
439 src = anyaddr;
440
441 if (tb[RTA_PREFSRC])
442 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
443
444 if (tb[RTA_GATEWAY])
445 gate = RTA_DATA(tb[RTA_GATEWAY]);
446
447 if (tb[RTA_PRIORITY])
448 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
449
450 #if defined(SUPPORT_REALMS)
451 if (tb[RTA_FLOW])
452 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
453 #endif
454
455 if (tb[RTA_METRICS]) {
456 struct rtattr *mxrta[RTAX_MAX + 1];
457
458 memset(mxrta, 0, sizeof mxrta);
459 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
460 RTA_PAYLOAD(tb[RTA_METRICS]));
461
462 if (mxrta[RTAX_MTU])
463 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
464 }
465
466 if (rtm->rtm_family == AF_INET) {
467 p.family = AF_INET;
468 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
469 zlog_err(
470 "Invalid destination prefix length: %u received from kernel route change",
471 rtm->rtm_dst_len);
472 return -1;
473 }
474 memcpy(&p.u.prefix4, dest, 4);
475 p.prefixlen = rtm->rtm_dst_len;
476
477 if (rtm->rtm_src_len != 0) {
478 char buf[PREFIX_STRLEN];
479 flog_warn(
480 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
481 "unsupported IPv4 sourcedest route (dest %s vrf %u)",
482 prefix2str(&p, buf, sizeof(buf)), vrf_id);
483 return 0;
484 }
485
486 /* Force debug below to not display anything for source */
487 src_p.prefixlen = 0;
488 } else if (rtm->rtm_family == AF_INET6) {
489 p.family = AF_INET6;
490 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
491 zlog_err(
492 "Invalid destination prefix length: %u received from kernel route change",
493 rtm->rtm_dst_len);
494 return -1;
495 }
496 memcpy(&p.u.prefix6, dest, 16);
497 p.prefixlen = rtm->rtm_dst_len;
498
499 src_p.family = AF_INET6;
500 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
501 zlog_err(
502 "Invalid source prefix length: %u received from kernel route change",
503 rtm->rtm_src_len);
504 return -1;
505 }
506 memcpy(&src_p.prefix, src, 16);
507 src_p.prefixlen = rtm->rtm_src_len;
508 }
509
510 /*
511 * For ZEBRA_ROUTE_KERNEL types:
512 *
513 * The metric/priority of the route received from the kernel
514 * is a 32 bit number. We are going to interpret the high
515 * order byte as the Admin Distance and the low order 3 bytes
516 * as the metric.
517 *
518 * This will allow us to do two things:
519 * 1) Allow the creation of kernel routes that can be
520 * overridden by zebra.
521 * 2) Allow the old behavior for 'most' kernel route types
522 * if a user enters 'ip route ...' v4 routes get a metric
523 * of 0 and v6 routes get a metric of 1024. Both of these
524 * values will end up with a admin distance of 0, which
525 * will cause them to win for the purposes of zebra.
526 */
527 if (proto == ZEBRA_ROUTE_KERNEL) {
528 distance = (metric >> 24) & 0xFF;
529 metric = (metric & 0x00FFFFFF);
530 }
531
532 if (IS_ZEBRA_DEBUG_KERNEL) {
533 char buf[PREFIX_STRLEN];
534 char buf2[PREFIX_STRLEN];
535 zlog_debug("%s %s%s%s vrf %u(%u) metric: %d Admin Distance: %d",
536 nl_msg_type_to_str(h->nlmsg_type),
537 prefix2str(&p, buf, sizeof(buf)),
538 src_p.prefixlen ? " from " : "",
539 src_p.prefixlen
540 ? prefix2str(&src_p, buf2, sizeof(buf2))
541 : "",
542 vrf_id, table, metric, distance);
543 }
544
545 afi_t afi = AFI_IP;
546 if (rtm->rtm_family == AF_INET6)
547 afi = AFI_IP6;
548
549 if (h->nlmsg_type == RTM_NEWROUTE) {
550 struct interface *ifp;
551 vrf_id_t nh_vrf_id = vrf_id;
552
553 if (!tb[RTA_MULTIPATH]) {
554 struct nexthop nh;
555 size_t sz = (afi == AFI_IP) ? 4 : 16;
556
557 memset(&nh, 0, sizeof(nh));
558
559 if (bh_type == BLACKHOLE_UNSPEC) {
560 if (index && !gate)
561 nh.type = NEXTHOP_TYPE_IFINDEX;
562 else if (index && gate)
563 nh.type =
564 (afi == AFI_IP)
565 ? NEXTHOP_TYPE_IPV4_IFINDEX
566 : NEXTHOP_TYPE_IPV6_IFINDEX;
567 else if (!index && gate)
568 nh.type = (afi == AFI_IP)
569 ? NEXTHOP_TYPE_IPV4
570 : NEXTHOP_TYPE_IPV6;
571 else {
572 nh.type = NEXTHOP_TYPE_BLACKHOLE;
573 nh.bh_type = bh_type;
574 }
575 } else {
576 nh.type = NEXTHOP_TYPE_BLACKHOLE;
577 nh.bh_type = bh_type;
578 }
579 nh.ifindex = index;
580 if (prefsrc)
581 memcpy(&nh.src, prefsrc, sz);
582 if (gate)
583 memcpy(&nh.gate, gate, sz);
584
585 if (index) {
586 ifp = if_lookup_by_index_per_ns(
587 zebra_ns_lookup(ns_id),
588 index);
589 if (ifp)
590 nh_vrf_id = ifp->vrf_id;
591 }
592 nh.vrf_id = nh_vrf_id;
593
594 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
595 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
596 == LWTUNNEL_ENCAP_MPLS) {
597 num_labels =
598 parse_encap_mpls(tb[RTA_ENCAP], labels);
599 }
600
601 if (rtm->rtm_flags & RTNH_F_ONLINK)
602 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
603
604 if (num_labels)
605 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC,
606 num_labels, labels);
607
608 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
609 &src_p, &nh, table, metric, mtu, distance, tag);
610 } else {
611 /* This is a multipath route */
612
613 struct route_entry *re;
614 struct rtnexthop *rtnh =
615 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
616
617 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
618
619 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
620 re->type = proto;
621 re->distance = distance;
622 re->flags = flags;
623 re->metric = metric;
624 re->mtu = mtu;
625 re->vrf_id = vrf_id;
626 re->table = table;
627 re->nexthop_num = 0;
628 re->uptime = monotime(NULL);
629 re->tag = tag;
630
631 for (;;) {
632 struct nexthop *nh = NULL;
633
634 if (len < (int)sizeof(*rtnh)
635 || rtnh->rtnh_len > len)
636 break;
637
638 index = rtnh->rtnh_ifindex;
639 if (index) {
640 /*
641 * Yes we are looking this up
642 * for every nexthop and just
643 * using the last one looked
644 * up right now
645 */
646 ifp = if_lookup_by_index_per_ns(
647 zebra_ns_lookup(ns_id),
648 index);
649 if (ifp)
650 nh_vrf_id = ifp->vrf_id;
651 else {
652 flog_warn(
653 EC_ZEBRA_UNKNOWN_INTERFACE,
654 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
655 __PRETTY_FUNCTION__,
656 index);
657 nh_vrf_id = VRF_DEFAULT;
658 }
659 } else
660 nh_vrf_id = vrf_id;
661
662 gate = 0;
663 if (rtnh->rtnh_len > sizeof(*rtnh)) {
664 memset(tb, 0, sizeof(tb));
665 netlink_parse_rtattr(
666 tb, RTA_MAX, RTNH_DATA(rtnh),
667 rtnh->rtnh_len - sizeof(*rtnh));
668 if (tb[RTA_GATEWAY])
669 gate = RTA_DATA(
670 tb[RTA_GATEWAY]);
671 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
672 && *(uint16_t *)RTA_DATA(
673 tb[RTA_ENCAP_TYPE])
674 == LWTUNNEL_ENCAP_MPLS) {
675 num_labels = parse_encap_mpls(
676 tb[RTA_ENCAP], labels);
677 }
678 }
679
680 if (gate) {
681 if (rtm->rtm_family == AF_INET) {
682 if (index)
683 nh = route_entry_nexthop_ipv4_ifindex_add(
684 re, gate,
685 prefsrc, index,
686 nh_vrf_id);
687 else
688 nh = route_entry_nexthop_ipv4_add(
689 re, gate,
690 prefsrc,
691 nh_vrf_id);
692 } else if (rtm->rtm_family
693 == AF_INET6) {
694 if (index)
695 nh = route_entry_nexthop_ipv6_ifindex_add(
696 re, gate, index,
697 nh_vrf_id);
698 else
699 nh = route_entry_nexthop_ipv6_add(
700 re, gate,
701 nh_vrf_id);
702 }
703 } else
704 nh = route_entry_nexthop_ifindex_add(
705 re, index, nh_vrf_id);
706
707 if (nh && num_labels)
708 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
709 num_labels, labels);
710
711 if (nh && (rtnh->rtnh_flags & RTNH_F_ONLINK))
712 SET_FLAG(nh->flags,
713 NEXTHOP_FLAG_ONLINK);
714
715 if (rtnh->rtnh_len == 0)
716 break;
717
718 len -= NLMSG_ALIGN(rtnh->rtnh_len);
719 rtnh = RTNH_NEXT(rtnh);
720 }
721
722 zserv_nexthop_num_warn(__func__,
723 (const struct prefix *)&p,
724 re->nexthop_num);
725 if (re->nexthop_num == 0)
726 XFREE(MTYPE_RE, re);
727 else
728 rib_add_multipath(afi, SAFI_UNICAST, &p,
729 &src_p, re);
730 }
731 } else {
732 if (!tb[RTA_MULTIPATH]) {
733 struct nexthop nh;
734 size_t sz = (afi == AFI_IP) ? 4 : 16;
735
736 memset(&nh, 0, sizeof(nh));
737 if (bh_type == BLACKHOLE_UNSPEC) {
738 if (index && !gate)
739 nh.type = NEXTHOP_TYPE_IFINDEX;
740 else if (index && gate)
741 nh.type =
742 (afi == AFI_IP)
743 ? NEXTHOP_TYPE_IPV4_IFINDEX
744 : NEXTHOP_TYPE_IPV6_IFINDEX;
745 else if (!index && gate)
746 nh.type = (afi == AFI_IP)
747 ? NEXTHOP_TYPE_IPV4
748 : NEXTHOP_TYPE_IPV6;
749 else {
750 nh.type = NEXTHOP_TYPE_BLACKHOLE;
751 nh.bh_type = BLACKHOLE_UNSPEC;
752 }
753 } else {
754 nh.type = NEXTHOP_TYPE_BLACKHOLE;
755 nh.bh_type = bh_type;
756 }
757 nh.ifindex = index;
758 if (gate)
759 memcpy(&nh.gate, gate, sz);
760 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
761 &p, &src_p, &nh, table, metric, distance,
762 true);
763 } else {
764 /* XXX: need to compare the entire list of nexthops
765 * here for NLM_F_APPEND stupidity */
766 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
767 &p, &src_p, NULL, table, metric, distance,
768 true);
769 }
770 }
771
772 return 0;
773 }
774
775 static struct mcast_route_data *mroute = NULL;
776
777 static int netlink_route_change_read_multicast(struct nlmsghdr *h,
778 ns_id_t ns_id, int startup)
779 {
780 int len;
781 struct rtmsg *rtm;
782 struct rtattr *tb[RTA_MAX + 1];
783 struct mcast_route_data *m;
784 struct mcast_route_data mr;
785 int iif = 0;
786 int count;
787 int oif[256];
788 int oif_count = 0;
789 char sbuf[40];
790 char gbuf[40];
791 char oif_list[256] = "\0";
792 vrf_id_t vrf;
793 int table;
794
795 if (mroute)
796 m = mroute;
797 else {
798 memset(&mr, 0, sizeof(mr));
799 m = &mr;
800 }
801
802 rtm = NLMSG_DATA(h);
803
804 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
805
806 memset(tb, 0, sizeof tb);
807 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
808
809 if (tb[RTA_TABLE])
810 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
811 else
812 table = rtm->rtm_table;
813
814 vrf = vrf_lookup_by_table(table, ns_id);
815
816 if (tb[RTA_IIF])
817 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
818
819 if (tb[RTA_SRC])
820 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
821
822 if (tb[RTA_DST])
823 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
824
825 if (tb[RTA_EXPIRES])
826 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
827
828 if (tb[RTA_MULTIPATH]) {
829 struct rtnexthop *rtnh =
830 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
831
832 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
833 for (;;) {
834 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
835 break;
836
837 oif[oif_count] = rtnh->rtnh_ifindex;
838 oif_count++;
839
840 if (rtnh->rtnh_len == 0)
841 break;
842
843 len -= NLMSG_ALIGN(rtnh->rtnh_len);
844 rtnh = RTNH_NEXT(rtnh);
845 }
846 }
847
848 if (IS_ZEBRA_DEBUG_KERNEL) {
849 struct interface *ifp = NULL;
850 struct zebra_vrf *zvrf = NULL;
851
852 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
853 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
854 for (count = 0; count < oif_count; count++) {
855 ifp = if_lookup_by_index(oif[count], vrf);
856 char temp[256];
857
858 sprintf(temp, "%s(%d) ", ifp ? ifp->name : "Unknown",
859 oif[count]);
860 strlcat(oif_list, temp, sizeof(oif_list));
861 }
862 zvrf = zebra_vrf_lookup_by_id(vrf);
863 ifp = if_lookup_by_index(iif, vrf);
864 zlog_debug(
865 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s(%d) OIF: %s jiffies: %lld",
866 (zvrf ? zvrf->vrf->name : "Unknown"), vrf,
867 nl_msg_type_to_str(h->nlmsg_type), sbuf, gbuf,
868 ifp ? ifp->name : "Unknown", iif, oif_list,
869 m->lastused);
870 }
871 return 0;
872 }
873
874 int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
875 {
876 int len;
877 struct rtmsg *rtm;
878
879 rtm = NLMSG_DATA(h);
880
881 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
882 /* If this is not route add/delete message print warning. */
883 zlog_debug("Kernel message: %s NS %u",
884 nl_msg_type_to_str(h->nlmsg_type), ns_id);
885 return 0;
886 }
887
888 if (!(rtm->rtm_family == AF_INET ||
889 rtm->rtm_family == AF_INET6 ||
890 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
891 flog_warn(
892 EC_ZEBRA_UNKNOWN_FAMILY,
893 "Invalid address family: %u received from kernel route change: %s",
894 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
895 return 0;
896 }
897
898 /* Connected route. */
899 if (IS_ZEBRA_DEBUG_KERNEL)
900 zlog_debug("%s %s %s proto %s NS %u",
901 nl_msg_type_to_str(h->nlmsg_type),
902 nl_family_to_str(rtm->rtm_family),
903 nl_rttype_to_str(rtm->rtm_type),
904 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
905
906
907 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
908 if (len < 0) {
909 zlog_err("%s: Message received from netlink is of a broken size: %d %zu",
910 __PRETTY_FUNCTION__,
911 h->nlmsg_len,
912 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
913 return -1;
914 }
915
916 if (rtm->rtm_type == RTN_MULTICAST)
917 netlink_route_change_read_multicast(h, ns_id, startup);
918 else
919 netlink_route_change_read_unicast(h, ns_id, startup);
920 return 0;
921 }
922
923 /* Request for specific route information from the kernel */
924 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
925 {
926 struct {
927 struct nlmsghdr n;
928 struct rtmsg rtm;
929 } req;
930
931 /* Form the request, specifying filter (rtattr) if needed. */
932 memset(&req, 0, sizeof(req));
933 req.n.nlmsg_type = type;
934 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
935 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
936 req.rtm.rtm_family = family;
937
938 return netlink_request(&zns->netlink_cmd, &req.n);
939 }
940
941 /* Routing table read function using netlink interface. Only called
942 bootstrap time. */
943 int netlink_route_read(struct zebra_ns *zns)
944 {
945 int ret;
946 struct zebra_dplane_info dp_info;
947
948 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
949
950 /* Get IPv4 routing table. */
951 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
952 if (ret < 0)
953 return ret;
954 ret = netlink_parse_info(netlink_route_change_read_unicast,
955 &zns->netlink_cmd, &dp_info, 0, 1);
956 if (ret < 0)
957 return ret;
958
959 /* Get IPv6 routing table. */
960 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
961 if (ret < 0)
962 return ret;
963 ret = netlink_parse_info(netlink_route_change_read_unicast,
964 &zns->netlink_cmd, &dp_info, 0, 1);
965 if (ret < 0)
966 return ret;
967
968 return 0;
969 }
970
971 static void _netlink_route_nl_add_gateway_info(uint8_t route_family,
972 uint8_t gw_family,
973 struct nlmsghdr *nlmsg,
974 size_t req_size, int bytelen,
975 const struct nexthop *nexthop)
976 {
977 if (route_family == AF_MPLS) {
978 struct gw_family_t gw_fam;
979
980 gw_fam.family = gw_family;
981 if (gw_family == AF_INET)
982 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
983 else
984 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
985 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
986 bytelen + 2);
987 } else {
988 if (gw_family == AF_INET)
989 addattr_l(nlmsg, req_size, RTA_GATEWAY,
990 &nexthop->gate.ipv4, bytelen);
991 else
992 addattr_l(nlmsg, req_size, RTA_GATEWAY,
993 &nexthop->gate.ipv6, bytelen);
994 }
995 }
996
997 static void _netlink_route_rta_add_gateway_info(uint8_t route_family,
998 uint8_t gw_family,
999 struct rtattr *rta,
1000 struct rtnexthop *rtnh,
1001 size_t req_size, int bytelen,
1002 const struct nexthop *nexthop)
1003 {
1004 if (route_family == AF_MPLS) {
1005 struct gw_family_t gw_fam;
1006
1007 gw_fam.family = gw_family;
1008 if (gw_family == AF_INET)
1009 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1010 else
1011 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1012 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
1013 bytelen + 2);
1014 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
1015 } else {
1016 if (gw_family == AF_INET)
1017 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1018 &nexthop->gate.ipv4, bytelen);
1019 else
1020 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1021 &nexthop->gate.ipv6, bytelen);
1022 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1023 }
1024 }
1025
1026 /* This function takes a nexthop as argument and adds
1027 * the appropriate netlink attributes to an existing
1028 * netlink message.
1029 *
1030 * @param routedesc: Human readable description of route type
1031 * (direct/recursive, single-/multipath)
1032 * @param bytelen: Length of addresses in bytes.
1033 * @param nexthop: Nexthop information
1034 * @param nlmsg: nlmsghdr structure to fill in.
1035 * @param req_size: The size allocated for the message.
1036 */
1037 static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
1038 const struct nexthop *nexthop,
1039 struct nlmsghdr *nlmsg,
1040 struct rtmsg *rtmsg,
1041 size_t req_size, int cmd)
1042 {
1043 struct mpls_label_stack *nh_label;
1044 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1045 int num_labels = 0;
1046 char label_buf[256];
1047
1048 /*
1049 * label_buf is *only* currently used within debugging.
1050 * As such when we assign it we are guarding it inside
1051 * a debug test. If you want to change this make sure
1052 * you fix this assumption
1053 */
1054 label_buf[0] = '\0';
1055
1056 assert(nexthop);
1057 char label_buf1[20];
1058
1059 nh_label = nexthop->nh_label;
1060
1061 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1062 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1063 continue;
1064
1065 if (IS_ZEBRA_DEBUG_KERNEL) {
1066 if (!num_labels)
1067 sprintf(label_buf, "label %u",
1068 nh_label->label[i]);
1069 else {
1070 sprintf(label_buf1, "/%u", nh_label->label[i]);
1071 strlcat(label_buf, label_buf1,
1072 sizeof(label_buf));
1073 }
1074 }
1075
1076 out_lse[num_labels] =
1077 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1078 num_labels++;
1079 }
1080
1081 if (num_labels) {
1082 /* Set the BoS bit */
1083 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1084
1085 if (rtmsg->rtm_family == AF_MPLS)
1086 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
1087 num_labels * sizeof(mpls_lse_t));
1088 else {
1089 struct rtattr *nest;
1090 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
1091
1092 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE, &encap,
1093 sizeof(uint16_t));
1094 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
1095 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST, &out_lse,
1096 num_labels * sizeof(mpls_lse_t));
1097 addattr_nest_end(nlmsg, nest);
1098 }
1099 }
1100
1101 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1102 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1103
1104 if (rtmsg->rtm_family == AF_INET
1105 && (nexthop->type == NEXTHOP_TYPE_IPV6
1106 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1107 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1108 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
1109 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1110
1111 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
1112 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1113 &nexthop->rmap_src.ipv4, bytelen);
1114 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
1115 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1116 &nexthop->src.ipv4, bytelen);
1117
1118 if (IS_ZEBRA_DEBUG_KERNEL)
1119 zlog_debug(
1120 " 5549: _netlink_route_build_singlepath() (%s): "
1121 "nexthop via %s %s if %u(%u)",
1122 routedesc, ipv4_ll_buf, label_buf,
1123 nexthop->ifindex, nexthop->vrf_id);
1124 return;
1125 }
1126
1127 if (nexthop->type == NEXTHOP_TYPE_IPV4
1128 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1129 /* Send deletes to the kernel without specifying the next-hop */
1130 if (cmd != RTM_DELROUTE)
1131 _netlink_route_nl_add_gateway_info(
1132 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1133 bytelen, nexthop);
1134
1135 if (cmd == RTM_NEWROUTE) {
1136 if (nexthop->rmap_src.ipv4.s_addr)
1137 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1138 &nexthop->rmap_src.ipv4, bytelen);
1139 else if (nexthop->src.ipv4.s_addr)
1140 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1141 &nexthop->src.ipv4, bytelen);
1142 }
1143
1144 if (IS_ZEBRA_DEBUG_KERNEL)
1145 zlog_debug(
1146 "netlink_route_multipath() (%s): "
1147 "nexthop via %s %s if %u(%u)",
1148 routedesc, inet_ntoa(nexthop->gate.ipv4),
1149 label_buf, nexthop->ifindex, nexthop->vrf_id);
1150 }
1151
1152 if (nexthop->type == NEXTHOP_TYPE_IPV6
1153 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1154 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1155 nlmsg, req_size, bytelen,
1156 nexthop);
1157
1158 if (cmd == RTM_NEWROUTE) {
1159 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1160 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1161 &nexthop->rmap_src.ipv6, bytelen);
1162 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1163 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1164 &nexthop->src.ipv6, bytelen);
1165 }
1166
1167 if (IS_ZEBRA_DEBUG_KERNEL)
1168 zlog_debug(
1169 "netlink_route_multipath() (%s): "
1170 "nexthop via %s %s if %u(%u)",
1171 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1172 label_buf, nexthop->ifindex, nexthop->vrf_id);
1173 }
1174
1175 /*
1176 * We have the ifindex so we should always send it
1177 * This is especially useful if we are doing route
1178 * leaking.
1179 */
1180 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1181 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1182
1183 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1184 if (cmd == RTM_NEWROUTE) {
1185 if (nexthop->rmap_src.ipv4.s_addr)
1186 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1187 &nexthop->rmap_src.ipv4, bytelen);
1188 else if (nexthop->src.ipv4.s_addr)
1189 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1190 &nexthop->src.ipv4, bytelen);
1191 }
1192
1193 if (IS_ZEBRA_DEBUG_KERNEL)
1194 zlog_debug(
1195 "netlink_route_multipath() (%s): "
1196 "nexthop via if %u(%u)",
1197 routedesc, nexthop->ifindex, nexthop->vrf_id);
1198 }
1199 }
1200
1201 /* This function takes a nexthop as argument and
1202 * appends to the given rtattr/rtnexthop pair the
1203 * representation of the nexthop. If the nexthop
1204 * defines a preferred source, the src parameter
1205 * will be modified to point to that src, otherwise
1206 * it will be kept unmodified.
1207 *
1208 * @param routedesc: Human readable description of route type
1209 * (direct/recursive, single-/multipath)
1210 * @param bytelen: Length of addresses in bytes.
1211 * @param nexthop: Nexthop information
1212 * @param rta: rtnetlink attribute structure
1213 * @param rtnh: pointer to an rtnetlink nexthop structure
1214 * @param src: pointer pointing to a location where
1215 * the prefsrc should be stored.
1216 */
1217 static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1218 const struct nexthop *nexthop,
1219 struct rtattr *rta,
1220 struct rtnexthop *rtnh,
1221 struct rtmsg *rtmsg,
1222 const union g_addr **src)
1223 {
1224 struct mpls_label_stack *nh_label;
1225 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1226 int num_labels = 0;
1227 char label_buf[256];
1228
1229 rtnh->rtnh_len = sizeof(*rtnh);
1230 rtnh->rtnh_flags = 0;
1231 rtnh->rtnh_hops = 0;
1232 rta->rta_len += rtnh->rtnh_len;
1233
1234 /*
1235 * label_buf is *only* currently used within debugging.
1236 * As such when we assign it we are guarding it inside
1237 * a debug test. If you want to change this make sure
1238 * you fix this assumption
1239 */
1240 label_buf[0] = '\0';
1241
1242 assert(nexthop);
1243 char label_buf1[20];
1244
1245 nh_label = nexthop->nh_label;
1246
1247 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1248 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1249 continue;
1250
1251 if (IS_ZEBRA_DEBUG_KERNEL) {
1252 if (!num_labels)
1253 sprintf(label_buf, "label %u",
1254 nh_label->label[i]);
1255 else {
1256 sprintf(label_buf1, "/%u", nh_label->label[i]);
1257 strlcat(label_buf, label_buf1,
1258 sizeof(label_buf));
1259 }
1260 }
1261
1262 out_lse[num_labels] =
1263 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1264 num_labels++;
1265 }
1266
1267 if (num_labels) {
1268 /* Set the BoS bit */
1269 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1270
1271 if (rtmsg->rtm_family == AF_MPLS) {
1272 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1273 &out_lse,
1274 num_labels * sizeof(mpls_lse_t));
1275 rtnh->rtnh_len +=
1276 RTA_LENGTH(num_labels * sizeof(mpls_lse_t));
1277 } else {
1278 struct rtattr *nest;
1279 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
1280 int len = rta->rta_len;
1281
1282 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_ENCAP_TYPE,
1283 &encap, sizeof(uint16_t));
1284 nest = rta_nest(rta, NL_PKT_BUF_SIZE, RTA_ENCAP);
1285 rta_addattr_l(rta, NL_PKT_BUF_SIZE, MPLS_IPTUNNEL_DST,
1286 &out_lse,
1287 num_labels * sizeof(mpls_lse_t));
1288 rta_nest_end(rta, nest);
1289 rtnh->rtnh_len += rta->rta_len - len;
1290 }
1291 }
1292
1293 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1294 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1295
1296 if (rtmsg->rtm_family == AF_INET
1297 && (nexthop->type == NEXTHOP_TYPE_IPV6
1298 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1299 bytelen = 4;
1300 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1301 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1302 bytelen);
1303 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1304 rtnh->rtnh_ifindex = nexthop->ifindex;
1305
1306 if (nexthop->rmap_src.ipv4.s_addr)
1307 *src = &nexthop->rmap_src;
1308 else if (nexthop->src.ipv4.s_addr)
1309 *src = &nexthop->src;
1310
1311 if (IS_ZEBRA_DEBUG_KERNEL)
1312 zlog_debug(
1313 " 5549: netlink_route_build_multipath() (%s): "
1314 "nexthop via %s %s if %u",
1315 routedesc, ipv4_ll_buf, label_buf,
1316 nexthop->ifindex);
1317 return;
1318 }
1319
1320 if (nexthop->type == NEXTHOP_TYPE_IPV4
1321 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1322 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1323 rta, rtnh, NL_PKT_BUF_SIZE,
1324 bytelen, nexthop);
1325 if (nexthop->rmap_src.ipv4.s_addr)
1326 *src = &nexthop->rmap_src;
1327 else if (nexthop->src.ipv4.s_addr)
1328 *src = &nexthop->src;
1329
1330 if (IS_ZEBRA_DEBUG_KERNEL)
1331 zlog_debug(
1332 "netlink_route_multipath() (%s): "
1333 "nexthop via %s %s if %u",
1334 routedesc, inet_ntoa(nexthop->gate.ipv4),
1335 label_buf, nexthop->ifindex);
1336 }
1337 if (nexthop->type == NEXTHOP_TYPE_IPV6
1338 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1339 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1340 rta, rtnh, NL_PKT_BUF_SIZE,
1341 bytelen, nexthop);
1342
1343 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1344 *src = &nexthop->rmap_src;
1345 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1346 *src = &nexthop->src;
1347
1348 if (IS_ZEBRA_DEBUG_KERNEL)
1349 zlog_debug(
1350 "netlink_route_multipath() (%s): "
1351 "nexthop via %s %s if %u",
1352 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1353 label_buf, nexthop->ifindex);
1354 }
1355
1356 /*
1357 * We have figured out the ifindex so we should always send it
1358 * This is especially useful if we are doing route
1359 * leaking.
1360 */
1361 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1362 rtnh->rtnh_ifindex = nexthop->ifindex;
1363
1364 /* ifindex */
1365 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1366 if (nexthop->rmap_src.ipv4.s_addr)
1367 *src = &nexthop->rmap_src;
1368 else if (nexthop->src.ipv4.s_addr)
1369 *src = &nexthop->src;
1370
1371 if (IS_ZEBRA_DEBUG_KERNEL)
1372 zlog_debug(
1373 "netlink_route_multipath() (%s): "
1374 "nexthop via if %u",
1375 routedesc, nexthop->ifindex);
1376 }
1377 }
1378
1379 static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1380 const zebra_nhlfe_t *nhlfe,
1381 struct nlmsghdr *nlmsg,
1382 struct rtmsg *rtmsg,
1383 size_t req_size, int cmd)
1384 {
1385 int bytelen;
1386 uint8_t family;
1387
1388 family = NHLFE_FAMILY(nhlfe);
1389 bytelen = (family == AF_INET ? 4 : 16);
1390 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1391 nlmsg, rtmsg, req_size, cmd);
1392 }
1393
1394
1395 static inline void
1396 _netlink_mpls_build_multipath(const char *routedesc, const zebra_nhlfe_t *nhlfe,
1397 struct rtattr *rta, struct rtnexthop *rtnh,
1398 struct rtmsg *rtmsg, const union g_addr **src)
1399 {
1400 int bytelen;
1401 uint8_t family;
1402
1403 family = NHLFE_FAMILY(nhlfe);
1404 bytelen = (family == AF_INET ? 4 : 16);
1405 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1406 rtnh, rtmsg, src);
1407 }
1408
1409
1410 /* Log debug information for netlink_route_multipath
1411 * if debug logging is enabled.
1412 *
1413 * @param cmd: Netlink command which is to be processed
1414 * @param p: Prefix for which the change is due
1415 * @param family: Address family which the change concerns
1416 * @param zvrf: The vrf we are in
1417 * @param tableid: The table we are working on
1418 */
1419 static void _netlink_route_debug(int cmd, const struct prefix *p,
1420 int family, vrf_id_t vrfid,
1421 uint32_t tableid)
1422 {
1423 if (IS_ZEBRA_DEBUG_KERNEL) {
1424 char buf[PREFIX_STRLEN];
1425 zlog_debug(
1426 "netlink_route_multipath(): %s %s vrf %u(%u)",
1427 nl_msg_type_to_str(cmd),
1428 prefix2str(p, buf, sizeof(buf)),
1429 vrfid, tableid);
1430 }
1431 }
1432
1433 static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
1434 {
1435 if (IS_ZEBRA_DEBUG_KERNEL)
1436 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1437 nl_msg_type_to_str(cmd), label);
1438 }
1439
1440 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1441 int llalen, ns_id_t ns_id)
1442 {
1443 struct {
1444 struct nlmsghdr n;
1445 struct ndmsg ndm;
1446 char buf[256];
1447 } req;
1448
1449 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
1450
1451 memset(&req, 0, sizeof(req));
1452
1453 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1454 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1455 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1456 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1457
1458 req.ndm.ndm_family = AF_INET;
1459 req.ndm.ndm_state = NUD_PERMANENT;
1460 req.ndm.ndm_ifindex = ifindex;
1461 req.ndm.ndm_type = RTN_UNICAST;
1462
1463 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1464 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1465
1466 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1467 0);
1468 }
1469
1470 /*
1471 * Routing table change via netlink interface, using a dataplane context object
1472 */
1473 static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx)
1474 {
1475 int bytelen;
1476 struct nexthop *nexthop = NULL;
1477 unsigned int nexthop_num;
1478 int family;
1479 const char *routedesc;
1480 int setsrc = 0;
1481 union g_addr src;
1482 const struct prefix *p, *src_p;
1483 uint32_t table_id;
1484
1485 struct {
1486 struct nlmsghdr n;
1487 struct rtmsg r;
1488 char buf[NL_PKT_BUF_SIZE];
1489 } req;
1490
1491 p = dplane_ctx_get_dest(ctx);
1492 src_p = dplane_ctx_get_src(ctx);
1493
1494 family = PREFIX_FAMILY(p);
1495
1496 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
1497
1498 bytelen = (family == AF_INET ? 4 : 16);
1499
1500 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1501 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1502
1503 if ((cmd == RTM_NEWROUTE) &&
1504 ((p->family == AF_INET) || v6_rr_semantics))
1505 req.n.nlmsg_flags |= NLM_F_REPLACE;
1506
1507 req.n.nlmsg_type = cmd;
1508
1509 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
1510
1511 req.r.rtm_family = family;
1512 req.r.rtm_dst_len = p->prefixlen;
1513 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1514 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1515
1516 if (cmd == RTM_DELROUTE)
1517 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
1518 else
1519 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
1520
1521 /*
1522 * blackhole routes are not RTN_UNICAST, they are
1523 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1524 * so setting this value as a RTN_UNICAST would
1525 * cause the route lookup of just the prefix
1526 * to fail. So no need to specify this for
1527 * the RTM_DELROUTE case
1528 */
1529 if (cmd != RTM_DELROUTE)
1530 req.r.rtm_type = RTN_UNICAST;
1531
1532 addattr_l(&req.n, sizeof(req), RTA_DST, &p->u.prefix, bytelen);
1533 if (src_p)
1534 addattr_l(&req.n, sizeof(req), RTA_SRC, &src_p->u.prefix,
1535 bytelen);
1536
1537 /* Metric. */
1538 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1539 * used
1540 * either by the kernel or by zebra. Its purely for calculating best
1541 * path(s)
1542 * by the routing protocol and for communicating with protocol peers.
1543 */
1544 addattr32(&req.n, sizeof(req), RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1545
1546 #if defined(SUPPORT_REALMS)
1547 {
1548 route_tag_t tag;
1549
1550 if (cmd == RTM_DELROUTE)
1551 tag = dplane_ctx_get_old_tag(ctx);
1552 else
1553 tag = dplane_ctx_get_tag(ctx);
1554
1555 if (tag > 0 && tag <= 255)
1556 addattr32(&req.n, sizeof(req), RTA_FLOW, tag);
1557 }
1558 #endif
1559 /* Table corresponding to this route. */
1560 table_id = dplane_ctx_get_table(ctx);
1561 if (table_id < 256)
1562 req.r.rtm_table = table_id;
1563 else {
1564 req.r.rtm_table = RT_TABLE_UNSPEC;
1565 addattr32(&req.n, sizeof(req), RTA_TABLE, table_id);
1566 }
1567
1568 _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id);
1569
1570 /*
1571 * If we are not updating the route and we have received
1572 * a route delete, then all we need to fill in is the
1573 * prefix information to tell the kernel to schwack
1574 * it.
1575 */
1576 if (cmd == RTM_DELROUTE)
1577 goto skip;
1578
1579 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
1580 char buf[NL_PKT_BUF_SIZE];
1581 struct rtattr *rta = (void *)buf;
1582 uint32_t mtu = dplane_ctx_get_mtu(ctx);
1583 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
1584
1585 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
1586 mtu = nexthop_mtu;
1587 rta->rta_type = RTA_METRICS;
1588 rta->rta_len = RTA_LENGTH(0);
1589 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1590 RTAX_MTU, &mtu, sizeof(mtu));
1591 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1592 RTA_PAYLOAD(rta));
1593 }
1594
1595 /* Count overall nexthops so we can decide whether to use singlepath
1596 * or multipath case.
1597 */
1598 nexthop_num = 0;
1599 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1600 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1601 continue;
1602 if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1603 continue;
1604
1605 nexthop_num++;
1606 }
1607
1608 /* Singlepath case. */
1609 if (nexthop_num == 1) {
1610 nexthop_num = 0;
1611 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1612 /*
1613 * So we want to cover 2 types of blackhole
1614 * routes here:
1615 * 1) A normal blackhole route( ala from a static
1616 * install.
1617 * 2) A recursively resolved blackhole route
1618 */
1619 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1620 switch (nexthop->bh_type) {
1621 case BLACKHOLE_ADMINPROHIB:
1622 req.r.rtm_type = RTN_PROHIBIT;
1623 break;
1624 case BLACKHOLE_REJECT:
1625 req.r.rtm_type = RTN_UNREACHABLE;
1626 break;
1627 default:
1628 req.r.rtm_type = RTN_BLACKHOLE;
1629 break;
1630 }
1631 goto skip;
1632 }
1633 if (CHECK_FLAG(nexthop->flags,
1634 NEXTHOP_FLAG_RECURSIVE)) {
1635
1636 if (setsrc)
1637 continue;
1638
1639 if (family == AF_INET) {
1640 if (nexthop->rmap_src.ipv4.s_addr
1641 != 0) {
1642 src.ipv4 =
1643 nexthop->rmap_src.ipv4;
1644 setsrc = 1;
1645 } else if (nexthop->src.ipv4.s_addr
1646 != 0) {
1647 src.ipv4 =
1648 nexthop->src.ipv4;
1649 setsrc = 1;
1650 }
1651 } else if (family == AF_INET6) {
1652 if (!IN6_IS_ADDR_UNSPECIFIED(
1653 &nexthop->rmap_src.ipv6)) {
1654 src.ipv6 =
1655 nexthop->rmap_src.ipv6;
1656 setsrc = 1;
1657 } else if (
1658 !IN6_IS_ADDR_UNSPECIFIED(
1659 &nexthop->src.ipv6)) {
1660 src.ipv6 =
1661 nexthop->src.ipv6;
1662 setsrc = 1;
1663 }
1664 }
1665 continue;
1666 }
1667
1668 if ((cmd == RTM_NEWROUTE
1669 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1670 routedesc = nexthop->rparent
1671 ? "recursive, single-path"
1672 : "single-path";
1673
1674 _netlink_route_build_singlepath(
1675 routedesc, bytelen, nexthop, &req.n,
1676 &req.r, sizeof(req), cmd);
1677 nexthop_num++;
1678 break;
1679 }
1680 }
1681 if (setsrc && (cmd == RTM_NEWROUTE)) {
1682 if (family == AF_INET)
1683 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
1684 &src.ipv4, bytelen);
1685 else if (family == AF_INET6)
1686 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
1687 &src.ipv6, bytelen);
1688 }
1689 } else { /* Multipath case */
1690 char buf[NL_PKT_BUF_SIZE];
1691 struct rtattr *rta = (void *)buf;
1692 struct rtnexthop *rtnh;
1693 const union g_addr *src1 = NULL;
1694
1695 rta->rta_type = RTA_MULTIPATH;
1696 rta->rta_len = RTA_LENGTH(0);
1697 rtnh = RTA_DATA(rta);
1698
1699 nexthop_num = 0;
1700 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1701 if (CHECK_FLAG(nexthop->flags,
1702 NEXTHOP_FLAG_RECURSIVE)) {
1703 /* This only works for IPv4 now */
1704 if (setsrc)
1705 continue;
1706
1707 if (family == AF_INET) {
1708 if (nexthop->rmap_src.ipv4.s_addr
1709 != 0) {
1710 src.ipv4 =
1711 nexthop->rmap_src.ipv4;
1712 setsrc = 1;
1713 } else if (nexthop->src.ipv4.s_addr
1714 != 0) {
1715 src.ipv4 =
1716 nexthop->src.ipv4;
1717 setsrc = 1;
1718 }
1719 } else if (family == AF_INET6) {
1720 if (!IN6_IS_ADDR_UNSPECIFIED(
1721 &nexthop->rmap_src.ipv6)) {
1722 src.ipv6 =
1723 nexthop->rmap_src.ipv6;
1724 setsrc = 1;
1725 } else if (
1726 !IN6_IS_ADDR_UNSPECIFIED(
1727 &nexthop->src.ipv6)) {
1728 src.ipv6 =
1729 nexthop->src.ipv6;
1730 setsrc = 1;
1731 }
1732 }
1733
1734 continue;
1735 }
1736
1737 if ((cmd == RTM_NEWROUTE
1738 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1739 routedesc = nexthop->rparent
1740 ? "recursive, multipath"
1741 : "multipath";
1742 nexthop_num++;
1743
1744 _netlink_route_build_multipath(
1745 routedesc, bytelen, nexthop, rta, rtnh,
1746 &req.r, &src1);
1747 rtnh = RTNH_NEXT(rtnh);
1748
1749 if (!setsrc && src1) {
1750 if (family == AF_INET)
1751 src.ipv4 = src1->ipv4;
1752 else if (family == AF_INET6)
1753 src.ipv6 = src1->ipv6;
1754
1755 setsrc = 1;
1756 }
1757 }
1758 }
1759 if (setsrc && (cmd == RTM_NEWROUTE)) {
1760 if (family == AF_INET)
1761 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
1762 &src.ipv4, bytelen);
1763 else if (family == AF_INET6)
1764 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
1765 &src.ipv6, bytelen);
1766 if (IS_ZEBRA_DEBUG_KERNEL)
1767 zlog_debug("Setting source");
1768 }
1769
1770 if (rta->rta_len > RTA_LENGTH(0))
1771 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1772 RTA_DATA(rta), RTA_PAYLOAD(rta));
1773 }
1774
1775 /* If there is no useful nexthop then return. */
1776 if (nexthop_num == 0) {
1777 if (IS_ZEBRA_DEBUG_KERNEL)
1778 zlog_debug(
1779 "netlink_route_multipath(): No useful nexthop.");
1780 return 0;
1781 }
1782
1783 skip:
1784 /* Talk to netlink socket. */
1785 return netlink_talk_info(netlink_talk_filter, &req.n,
1786 dplane_ctx_get_ns(ctx), 0);
1787 }
1788
1789 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1790 {
1791 uint32_t actual_table;
1792 int suc = 0;
1793 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1794 struct {
1795 struct nlmsghdr n;
1796 struct ndmsg ndm;
1797 char buf[256];
1798 } req;
1799
1800 mroute = mr;
1801 struct zebra_ns *zns;
1802
1803 zns = zvrf->zns;
1804 memset(&req, 0, sizeof(req));
1805
1806 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1807 req.n.nlmsg_flags = NLM_F_REQUEST;
1808 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1809
1810 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1811 req.n.nlmsg_type = RTM_GETROUTE;
1812
1813 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1814 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1815 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1816 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1817 /*
1818 * What?
1819 *
1820 * So during the namespace cleanup we started storing
1821 * the zvrf table_id for the default table as RT_TABLE_MAIN
1822 * which is what the normal routing table for ip routing is.
1823 * This change caused this to break our lookups of sg data
1824 * because prior to this change the zvrf->table_id was 0
1825 * and when the pim multicast kernel code saw a 0,
1826 * it was auto-translated to RT_TABLE_DEFAULT. But since
1827 * we are now passing in RT_TABLE_MAIN there is no auto-translation
1828 * and the kernel goes screw you and the delicious cookies you
1829 * are trying to give me. So now we have this little hack.
1830 */
1831 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
1832 zvrf->table_id;
1833 addattr_l(&req.n, sizeof(req), RTA_TABLE, &actual_table, 4);
1834
1835 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1836 &zns->netlink_cmd, zns, 0);
1837
1838 mroute = NULL;
1839 return suc;
1840 }
1841
1842 /*
1843 * Update or delete a prefix from the kernel,
1844 * using info from a dataplane context.
1845 */
1846 enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
1847 {
1848 int cmd, ret;
1849 const struct prefix *p = dplane_ctx_get_dest(ctx);
1850 struct nexthop *nexthop;
1851
1852 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
1853 cmd = RTM_DELROUTE;
1854 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
1855 cmd = RTM_NEWROUTE;
1856 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
1857
1858 if (p->family == AF_INET || v6_rr_semantics) {
1859 /* Single 'replace' operation */
1860 cmd = RTM_NEWROUTE;
1861
1862 /*
1863 * With route replace semantics in place
1864 * for v4 routes and the new route is a system
1865 * route we do not install anything.
1866 * The problem here is that the new system
1867 * route should cause us to withdraw from
1868 * the kernel the old non-system route
1869 */
1870 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) &&
1871 !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
1872 (void)netlink_route_multipath(RTM_DELROUTE,
1873 ctx);
1874 } else {
1875 /*
1876 * So v6 route replace semantics are not in
1877 * the kernel at this point as I understand it.
1878 * so let's do a delete then an add.
1879 * In the future once v6 route replace semantics
1880 * are in we can figure out what to do here to
1881 * allow working with old and new kernels.
1882 *
1883 * I'm also intentionally ignoring the failure case
1884 * of the route delete. If that happens yeah we're
1885 * screwed.
1886 */
1887 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
1888 (void)netlink_route_multipath(RTM_DELROUTE,
1889 ctx);
1890 cmd = RTM_NEWROUTE;
1891 }
1892
1893 } else {
1894 return ZEBRA_DPLANE_REQUEST_FAILURE;
1895 }
1896
1897 if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
1898 ret = netlink_route_multipath(cmd, ctx);
1899 else
1900 ret = 0;
1901 if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
1902 /* Update installed nexthops to signal which have been
1903 * installed.
1904 */
1905 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1906 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1907 continue;
1908
1909 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
1910 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
1911 }
1912 }
1913 }
1914
1915 return (ret == 0 ?
1916 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
1917 }
1918
1919 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1920 int llalen, ns_id_t ns_id)
1921 {
1922 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1923 addr, lla, llalen, ns_id);
1924 }
1925
1926 /*
1927 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1928 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1929 */
1930 static int netlink_vxlan_flood_list_update(struct interface *ifp,
1931 struct in_addr *vtep_ip, int cmd)
1932 {
1933 struct zebra_ns *zns;
1934 struct {
1935 struct nlmsghdr n;
1936 struct ndmsg ndm;
1937 char buf[256];
1938 } req;
1939 uint8_t dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1940 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id);
1941
1942 zns = zvrf->zns;
1943 memset(&req, 0, sizeof(req));
1944
1945 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1946 req.n.nlmsg_flags = NLM_F_REQUEST;
1947 if (cmd == RTM_NEWNEIGH)
1948 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1949 req.n.nlmsg_type = cmd;
1950 req.ndm.ndm_family = PF_BRIDGE;
1951 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1952 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1953
1954
1955 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1956 req.ndm.ndm_ifindex = ifp->ifindex;
1957 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1958
1959 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1960 0);
1961 }
1962
1963 /*
1964 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1965 * adding
1966 * a "flood" MAC FDB entry.
1967 */
1968 int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1969 {
1970 if (IS_ZEBRA_DEBUG_VXLAN)
1971 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1972 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1973
1974 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
1975 }
1976
1977 /*
1978 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1979 * deleting the "flood" MAC FDB entry.
1980 */
1981 int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1982 {
1983 if (IS_ZEBRA_DEBUG_VXLAN)
1984 zlog_debug(
1985 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1986 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1987
1988 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
1989 }
1990
1991 #ifndef NDA_RTA
1992 #define NDA_RTA(r) \
1993 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1994 #endif
1995
1996 static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
1997 {
1998 struct ndmsg *ndm;
1999 struct interface *ifp;
2000 struct zebra_if *zif;
2001 struct rtattr *tb[NDA_MAX + 1];
2002 struct interface *br_if;
2003 struct ethaddr mac;
2004 vlanid_t vid = 0;
2005 struct prefix vtep_ip;
2006 int vid_present = 0, dst_present = 0;
2007 char buf[ETHER_ADDR_STRLEN];
2008 char vid_buf[20];
2009 char dst_buf[30];
2010 bool sticky;
2011
2012 ndm = NLMSG_DATA(h);
2013
2014 /* We only process macfdb notifications if EVPN is enabled */
2015 if (!is_evpn_enabled())
2016 return 0;
2017
2018 /* The interface should exist. */
2019 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2020 ndm->ndm_ifindex);
2021 if (!ifp || !ifp->info) {
2022 if (IS_ZEBRA_DEBUG_KERNEL)
2023 zlog_debug("\t%s without associated interface: %u",
2024 __PRETTY_FUNCTION__, ndm->ndm_ifindex);
2025 return 0;
2026 }
2027
2028 /* The interface should be something we're interested in. */
2029 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) {
2030 if (IS_ZEBRA_DEBUG_KERNEL)
2031 zlog_debug("\t%s Not interested in %s, not a slave",
2032 __PRETTY_FUNCTION__, ifp->name);
2033 return 0;
2034 }
2035
2036 /* Drop "permanent" entries. */
2037 if (ndm->ndm_state & NUD_PERMANENT) {
2038 if (IS_ZEBRA_DEBUG_KERNEL)
2039 zlog_debug("\t%s Entry is PERMANENT, dropping",
2040 __PRETTY_FUNCTION__);
2041 return 0;
2042 }
2043
2044 zif = (struct zebra_if *)ifp->info;
2045 if ((br_if = zif->brslave_info.br_if) == NULL) {
2046 if (IS_ZEBRA_DEBUG_KERNEL)
2047 zlog_debug(
2048 "%s family %s IF %s(%u) brIF %u - no bridge master",
2049 nl_msg_type_to_str(h->nlmsg_type),
2050 nl_family_to_str(ndm->ndm_family), ifp->name,
2051 ndm->ndm_ifindex,
2052 zif->brslave_info.bridge_ifindex);
2053 return 0;
2054 }
2055
2056 /* Parse attributes and extract fields of interest. */
2057 memset(tb, 0, sizeof tb);
2058 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2059
2060 if (!tb[NDA_LLADDR]) {
2061 if (IS_ZEBRA_DEBUG_KERNEL)
2062 zlog_debug("%s family %s IF %s(%u) brIF %u - no LLADDR",
2063 nl_msg_type_to_str(h->nlmsg_type),
2064 nl_family_to_str(ndm->ndm_family), ifp->name,
2065 ndm->ndm_ifindex,
2066 zif->brslave_info.bridge_ifindex);
2067 return 0;
2068 }
2069
2070 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2071 if (IS_ZEBRA_DEBUG_KERNEL)
2072 zlog_debug(
2073 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
2074 nl_msg_type_to_str(h->nlmsg_type),
2075 nl_family_to_str(ndm->ndm_family), ifp->name,
2076 ndm->ndm_ifindex,
2077 zif->brslave_info.bridge_ifindex,
2078 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2079 return 0;
2080 }
2081
2082 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2083
2084 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
2085 vid_present = 1;
2086 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
2087 sprintf(vid_buf, " VLAN %u", vid);
2088 }
2089
2090 if (tb[NDA_DST]) {
2091 /* TODO: Only IPv4 supported now. */
2092 dst_present = 1;
2093 vtep_ip.family = AF_INET;
2094 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
2095 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
2096 IPV4_MAX_BYTELEN);
2097 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
2098 }
2099
2100 sticky = !!(ndm->ndm_state & NUD_NOARP);
2101
2102 if (IS_ZEBRA_DEBUG_KERNEL)
2103 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
2104 nl_msg_type_to_str(h->nlmsg_type),
2105 nl_family_to_str(ndm->ndm_family), ifp->name,
2106 ndm->ndm_ifindex, vid_present ? vid_buf : "",
2107 sticky ? "sticky " : "",
2108 prefix_mac2str(&mac, buf, sizeof(buf)),
2109 dst_present ? dst_buf : "");
2110
2111 if (filter_vlan && vid != filter_vlan) {
2112 if (IS_ZEBRA_DEBUG_KERNEL)
2113 zlog_debug("\tFiltered due to filter vlan: %d",
2114 filter_vlan);
2115 return 0;
2116 }
2117
2118 /* If add or update, do accordingly if learnt on a "local" interface; if
2119 * the notification is over VxLAN, this has to be related to
2120 * multi-homing,
2121 * so perform an implicit delete of any local entry (if it exists).
2122 */
2123 if (h->nlmsg_type == RTM_NEWNEIGH) {
2124 if (IS_ZEBRA_IF_VXLAN(ifp))
2125 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
2126 vid);
2127
2128 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
2129 sticky);
2130 }
2131
2132 /* This is a delete notification.
2133 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
2134 * 2. For a MAC over "local" interface, delete the mac
2135 * Note: We will get notifications from both bridge driver and VxLAN
2136 * driver.
2137 * Ignore the notification from VxLan driver as it is also generated
2138 * when mac moves from remote to local.
2139 */
2140 if (dst_present) {
2141 if (IS_ZEBRA_DEBUG_KERNEL)
2142 zlog_debug("\tNo Destination Present");
2143 return 0;
2144 }
2145
2146 if (IS_ZEBRA_IF_VXLAN(ifp))
2147 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
2148 vid);
2149
2150 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2151 }
2152
2153 static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2154 {
2155 int len;
2156 struct ndmsg *ndm;
2157
2158 if (h->nlmsg_type != RTM_NEWNEIGH)
2159 return 0;
2160
2161 /* Length validity. */
2162 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2163 if (len < 0)
2164 return -1;
2165
2166 /* We are interested only in AF_BRIDGE notifications. */
2167 ndm = NLMSG_DATA(h);
2168 if (ndm->ndm_family != AF_BRIDGE)
2169 return 0;
2170
2171 return netlink_macfdb_change(h, len, ns_id);
2172 }
2173
2174 /* Request for MAC FDB information from the kernel */
2175 static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
2176 int type, ifindex_t master_ifindex)
2177 {
2178 struct {
2179 struct nlmsghdr n;
2180 struct ifinfomsg ifm;
2181 char buf[256];
2182 } req;
2183
2184 /* Form the request, specifying filter (rtattr) if needed. */
2185 memset(&req, 0, sizeof(req));
2186 req.n.nlmsg_type = type;
2187 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
2188 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
2189 req.ifm.ifi_family = family;
2190 if (master_ifindex)
2191 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
2192
2193 return netlink_request(netlink_cmd, &req.n);
2194 }
2195
2196 /*
2197 * MAC forwarding database read using netlink interface. This is invoked
2198 * at startup.
2199 */
2200 int netlink_macfdb_read(struct zebra_ns *zns)
2201 {
2202 int ret;
2203 struct zebra_dplane_info dp_info;
2204
2205 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2206
2207 /* Get bridge FDB table. */
2208 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
2209 0);
2210 if (ret < 0)
2211 return ret;
2212 /* We are reading entire table. */
2213 filter_vlan = 0;
2214 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2215 &dp_info, 0, 1);
2216
2217 return ret;
2218 }
2219
2220 /*
2221 * MAC forwarding database read using netlink interface. This is for a
2222 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
2223 */
2224 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
2225 struct interface *br_if)
2226 {
2227 struct zebra_if *br_zif;
2228 struct zebra_if *zif;
2229 struct zebra_l2info_vxlan *vxl;
2230 struct zebra_dplane_info dp_info;
2231 int ret = 0;
2232
2233 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2234
2235 /* Save VLAN we're filtering on, if needed. */
2236 br_zif = (struct zebra_if *)br_if->info;
2237 zif = (struct zebra_if *)ifp->info;
2238 vxl = &zif->l2info.vxl;
2239 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
2240 filter_vlan = vxl->access_vlan;
2241
2242 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2243 */
2244 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
2245 br_if->ifindex);
2246 if (ret < 0)
2247 return ret;
2248 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2249 &dp_info, 0, 0);
2250
2251 /* Reset VLAN filter. */
2252 filter_vlan = 0;
2253 return ret;
2254 }
2255
2256
2257 /* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
2258 static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
2259 int family,
2260 int type,
2261 struct interface *br_if,
2262 struct ethaddr *mac,
2263 vlanid_t vid)
2264 {
2265 struct {
2266 struct nlmsghdr n;
2267 struct ndmsg ndm;
2268 char buf[256];
2269 } req;
2270 struct zebra_if *br_zif;
2271 char buf[ETHER_ADDR_STRLEN];
2272
2273 memset(&req, 0, sizeof(req));
2274 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2275 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
2276 req.n.nlmsg_flags = NLM_F_REQUEST;
2277 req.ndm.ndm_family = family; /* AF_BRIDGE */
2278 /* req.ndm.ndm_state = NUD_REACHABLE; */
2279
2280 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2281
2282 br_zif = (struct zebra_if *)br_if->info;
2283 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
2284 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2285
2286 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2287
2288 if (IS_ZEBRA_DEBUG_KERNEL)
2289 zlog_debug("%s: Tx family %s IF %s(%u) MAC %s vid %u",
2290 __PRETTY_FUNCTION__,
2291 nl_family_to_str(req.ndm.ndm_family), br_if->name,
2292 br_if->ifindex,
2293 prefix_mac2str(mac, buf, sizeof(buf)), vid);
2294
2295 return netlink_request(&zns->netlink_cmd, &req.n);
2296 }
2297
2298 int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
2299 struct interface *br_if,
2300 struct ethaddr *mac, vlanid_t vid)
2301 {
2302 int ret = 0;
2303 struct zebra_dplane_info dp_info;
2304
2305 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2306
2307 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2308 */
2309 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
2310 RTM_GETNEIGH,
2311 br_if, mac, vid);
2312 if (ret < 0)
2313 return ret;
2314
2315 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2316 &dp_info, 1, 0);
2317
2318 return ret;
2319 }
2320
2321 /*
2322 * Netlink-specific handler for MAC updates using dataplane context object.
2323 */
2324 static enum zebra_dplane_result
2325 netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx)
2326 {
2327 struct {
2328 struct nlmsghdr n;
2329 struct ndmsg ndm;
2330 char buf[256];
2331 } req;
2332 int ret;
2333 int dst_alen;
2334 struct zebra_if *zif;
2335 struct interface *br_if;
2336 struct zebra_if *br_zif;
2337 int vid_present = 0;
2338 char vid_buf[20];
2339 struct zebra_ns *zns;
2340 struct interface *ifp;
2341 int cmd;
2342 struct in_addr vtep_ip;
2343 vlanid_t vid;
2344
2345 if (dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL)
2346 cmd = RTM_NEWNEIGH;
2347 else
2348 cmd = RTM_DELNEIGH;
2349
2350 /* Locate zebra ns and interface objects from context data */
2351 zns = zebra_ns_lookup(dplane_ctx_get_ns(ctx)->ns_id);
2352 if (zns == NULL) {
2353 /* Nothing to be done */
2354 if (IS_ZEBRA_DEBUG_KERNEL)
2355 zlog_debug("MAC %s on IF %s(%u) - zebra ns unknown",
2356 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2357 dplane_ctx_get_ifname(ctx),
2358 dplane_ctx_get_ifindex(ctx));
2359
2360 return ZEBRA_DPLANE_REQUEST_FAILURE;
2361 }
2362
2363 ifp = if_lookup_by_index_per_ns(zns, dplane_ctx_get_ifindex(ctx));
2364 if (ifp == NULL) {
2365 /* Nothing to be done */
2366 /* Nothing to be done */
2367 if (IS_ZEBRA_DEBUG_KERNEL)
2368 zlog_debug("MAC %s on IF %s(%u) - interface unknown",
2369 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2370 dplane_ctx_get_ifname(ctx),
2371 dplane_ctx_get_ifindex(ctx));
2372 return ZEBRA_DPLANE_REQUEST_FAILURE;
2373 }
2374
2375 vid = dplane_ctx_mac_get_vlan(ctx);
2376
2377 zif = ifp->info;
2378 if ((br_if = zif->brslave_info.br_if) == NULL) {
2379 if (IS_ZEBRA_DEBUG_KERNEL)
2380 zlog_debug("MAC %s on IF %s(%u) - no mapping to bridge",
2381 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2382 ifp->name, ifp->ifindex);
2383 return ZEBRA_DPLANE_REQUEST_FAILURE;
2384 }
2385
2386 memset(&req, 0, sizeof(req));
2387
2388 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2389 req.n.nlmsg_flags = NLM_F_REQUEST;
2390 if (cmd == RTM_NEWNEIGH)
2391 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2392 req.n.nlmsg_type = cmd;
2393 req.ndm.ndm_family = AF_BRIDGE;
2394 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2395 req.ndm.ndm_state = NUD_REACHABLE;
2396
2397 if (dplane_ctx_mac_is_sticky(ctx))
2398 req.ndm.ndm_state |= NUD_NOARP;
2399 else
2400 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2401
2402 addattr_l(&req.n, sizeof(req), NDA_LLADDR,
2403 dplane_ctx_mac_get_addr(ctx), 6);
2404 req.ndm.ndm_ifindex = ifp->ifindex;
2405
2406 dst_alen = 4; // TODO: hardcoded
2407 vtep_ip = *(dplane_ctx_mac_get_vtep_ip(ctx));
2408 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2409
2410 br_zif = (struct zebra_if *)br_if->info;
2411 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2412 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2413 vid_present = 1;
2414 sprintf(vid_buf, " VLAN %u", vid);
2415 }
2416 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2417
2418 if (IS_ZEBRA_DEBUG_KERNEL) {
2419 char ipbuf[PREFIX_STRLEN];
2420 char buf[ETHER_ADDR_STRLEN];
2421 char dst_buf[PREFIX_STRLEN + 10];
2422
2423 inet_ntop(AF_INET, &vtep_ip, ipbuf, sizeof(ipbuf));
2424 snprintf(dst_buf, sizeof(dst_buf), " dst %s", ipbuf);
2425 prefix_mac2str(dplane_ctx_mac_get_addr(ctx), buf, sizeof(buf));
2426
2427 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2428 nl_msg_type_to_str(cmd),
2429 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2430 ifp->ifindex, vid_present ? vid_buf : "",
2431 dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
2432 buf, dst_buf);
2433 }
2434
2435 ret = netlink_talk_info(netlink_talk_filter, &req.n,
2436 dplane_ctx_get_ns(ctx), 0);
2437 if (ret == 0)
2438 return ZEBRA_DPLANE_REQUEST_SUCCESS;
2439 else
2440 return ZEBRA_DPLANE_REQUEST_FAILURE;
2441 }
2442
2443 /*
2444 * In the event the kernel deletes ipv4 link-local neighbor entries created for
2445 * 5549 support, re-install them.
2446 */
2447 static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
2448 struct interface *ifp, struct ipaddr *ip,
2449 bool handle_failed)
2450 {
2451 if (ndm->ndm_family != AF_INET)
2452 return;
2453
2454 if (!zif->v6_2_v4_ll_neigh_entry)
2455 return;
2456
2457 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
2458 return;
2459
2460 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
2461 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
2462 ifp->name);
2463 return;
2464 }
2465
2466 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
2467 }
2468
2469 #define NUD_VALID \
2470 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2471 | NUD_DELAY)
2472
2473 static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2474 {
2475 struct ndmsg *ndm;
2476 struct interface *ifp;
2477 struct zebra_if *zif;
2478 struct rtattr *tb[NDA_MAX + 1];
2479 struct interface *link_if;
2480 struct ethaddr mac;
2481 struct ipaddr ip;
2482 char buf[ETHER_ADDR_STRLEN];
2483 char buf2[INET6_ADDRSTRLEN];
2484 int mac_present = 0;
2485 bool is_ext;
2486 bool is_router;
2487
2488 ndm = NLMSG_DATA(h);
2489
2490 /* The interface should exist. */
2491 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2492 ndm->ndm_ifindex);
2493 if (!ifp || !ifp->info)
2494 return 0;
2495
2496 zif = (struct zebra_if *)ifp->info;
2497
2498 /* Parse attributes and extract fields of interest. */
2499 memset(tb, 0, sizeof tb);
2500 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2501
2502 if (!tb[NDA_DST]) {
2503 zlog_debug("%s family %s IF %s(%u) - no DST",
2504 nl_msg_type_to_str(h->nlmsg_type),
2505 nl_family_to_str(ndm->ndm_family), ifp->name,
2506 ndm->ndm_ifindex);
2507 return 0;
2508 }
2509
2510 memset(&ip, 0, sizeof(struct ipaddr));
2511 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2512 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2513
2514 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
2515 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
2516 netlink_handle_5549(ndm, zif, ifp, &ip, false);
2517 if (IS_ZEBRA_DEBUG_KERNEL)
2518 zlog_debug(
2519 "\tNeighbor Entry Received is a 5549 entry, finished");
2520 return 0;
2521 }
2522
2523 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
2524 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
2525 netlink_handle_5549(ndm, zif, ifp, &ip, true);
2526
2527 /* The neighbor is present on an SVI. From this, we locate the
2528 * underlying
2529 * bridge because we're only interested in neighbors on a VxLAN bridge.
2530 * The bridge is located based on the nature of the SVI:
2531 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2532 * interface
2533 * and is linked to the bridge
2534 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2535 * inteface
2536 * itself
2537 */
2538 if (IS_ZEBRA_IF_VLAN(ifp)) {
2539 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2540 zif->link_ifindex);
2541 if (!link_if)
2542 return 0;
2543 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2544 link_if = ifp;
2545 else {
2546 if (IS_ZEBRA_DEBUG_KERNEL)
2547 zlog_debug(
2548 "\tNeighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
2549 return 0;
2550 }
2551
2552 memset(&mac, 0, sizeof(struct ethaddr));
2553 if (h->nlmsg_type == RTM_NEWNEIGH) {
2554 if (tb[NDA_LLADDR]) {
2555 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2556 if (IS_ZEBRA_DEBUG_KERNEL)
2557 zlog_debug(
2558 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2559 nl_msg_type_to_str(
2560 h->nlmsg_type),
2561 nl_family_to_str(
2562 ndm->ndm_family),
2563 ifp->name, ndm->ndm_ifindex,
2564 (unsigned long)RTA_PAYLOAD(
2565 tb[NDA_LLADDR]));
2566 return 0;
2567 }
2568
2569 mac_present = 1;
2570 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2571 }
2572
2573 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
2574 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
2575
2576 if (IS_ZEBRA_DEBUG_KERNEL)
2577 zlog_debug(
2578 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2579 nl_msg_type_to_str(h->nlmsg_type),
2580 nl_family_to_str(ndm->ndm_family), ifp->name,
2581 ndm->ndm_ifindex,
2582 ipaddr2str(&ip, buf2, sizeof(buf2)),
2583 mac_present
2584 ? prefix_mac2str(&mac, buf, sizeof(buf))
2585 : "",
2586 ndm->ndm_state, ndm->ndm_flags);
2587
2588 /* If the neighbor state is valid for use, process as an add or
2589 * update
2590 * else process as a delete. Note that the delete handling may
2591 * result
2592 * in re-adding the neighbor if it is a valid "remote" neighbor.
2593 */
2594 if (ndm->ndm_state & NUD_VALID)
2595 return zebra_vxlan_handle_kernel_neigh_update(
2596 ifp, link_if, &ip, &mac, ndm->ndm_state,
2597 is_ext, is_router);
2598
2599 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2600 }
2601
2602 if (IS_ZEBRA_DEBUG_KERNEL)
2603 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2604 nl_msg_type_to_str(h->nlmsg_type),
2605 nl_family_to_str(ndm->ndm_family), ifp->name,
2606 ndm->ndm_ifindex,
2607 ipaddr2str(&ip, buf2, sizeof(buf2)));
2608
2609 /* Process the delete - it may result in re-adding the neighbor if it is
2610 * a valid "remote" neighbor.
2611 */
2612 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2613 }
2614
2615 static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2616 {
2617 int len;
2618 struct ndmsg *ndm;
2619
2620 if (h->nlmsg_type != RTM_NEWNEIGH)
2621 return 0;
2622
2623 /* Length validity. */
2624 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2625 if (len < 0)
2626 return -1;
2627
2628 /* We are interested only in AF_INET or AF_INET6 notifications. */
2629 ndm = NLMSG_DATA(h);
2630 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2631 return 0;
2632
2633 return netlink_neigh_change(h, len);
2634 }
2635
2636 /* Request for IP neighbor information from the kernel */
2637 static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
2638 int type, ifindex_t ifindex)
2639 {
2640 struct {
2641 struct nlmsghdr n;
2642 struct ndmsg ndm;
2643 char buf[256];
2644 } req;
2645
2646 /* Form the request, specifying filter (rtattr) if needed. */
2647 memset(&req, 0, sizeof(req));
2648 req.n.nlmsg_type = type;
2649 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
2650 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2651 req.ndm.ndm_family = family;
2652 if (ifindex)
2653 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2654
2655 return netlink_request(netlink_cmd, &req.n);
2656 }
2657
2658 /*
2659 * IP Neighbor table read using netlink interface. This is invoked
2660 * at startup.
2661 */
2662 int netlink_neigh_read(struct zebra_ns *zns)
2663 {
2664 int ret;
2665 struct zebra_dplane_info dp_info;
2666
2667 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2668
2669 /* Get IP neighbor table. */
2670 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
2671 0);
2672 if (ret < 0)
2673 return ret;
2674 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2675 &dp_info, 0, 1);
2676
2677 return ret;
2678 }
2679
2680 /*
2681 * IP Neighbor table read using netlink interface. This is for a specific
2682 * VLAN device.
2683 */
2684 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2685 {
2686 int ret = 0;
2687 struct zebra_dplane_info dp_info;
2688
2689 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2690
2691 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
2692 vlan_if->ifindex);
2693 if (ret < 0)
2694 return ret;
2695 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2696 &dp_info, 0, 0);
2697
2698 return ret;
2699 }
2700
2701 /*
2702 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
2703 * read using netlink interface.
2704 */
2705 static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
2706 int type, struct ipaddr *ip,
2707 ifindex_t ifindex)
2708 {
2709 struct {
2710 struct nlmsghdr n;
2711 struct ndmsg ndm;
2712 char buf[256];
2713 } req;
2714 int ipa_len;
2715
2716 /* Form the request, specifying filter (rtattr) if needed. */
2717 memset(&req, 0, sizeof(req));
2718 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2719 req.n.nlmsg_flags = NLM_F_REQUEST;
2720 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
2721 req.ndm.ndm_ifindex = ifindex;
2722
2723 if (IS_IPADDR_V4(ip)) {
2724 ipa_len = IPV4_MAX_BYTELEN;
2725 req.ndm.ndm_family = AF_INET;
2726
2727 } else {
2728 ipa_len = IPV6_MAX_BYTELEN;
2729 req.ndm.ndm_family = AF_INET6;
2730 }
2731
2732 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2733
2734 return netlink_request(&zns->netlink_cmd, &req.n);
2735 }
2736
2737 int netlink_neigh_read_specific_ip(struct ipaddr *ip,
2738 struct interface *vlan_if)
2739 {
2740 int ret = 0;
2741 struct zebra_ns *zns;
2742 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vlan_if->vrf_id);
2743 char buf[INET6_ADDRSTRLEN];
2744 struct zebra_dplane_info dp_info;
2745
2746 zns = zvrf->zns;
2747
2748 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2749
2750 if (IS_ZEBRA_DEBUG_KERNEL)
2751 zlog_debug("%s: neigh request IF %s(%u) IP %s vrf_id %u",
2752 __PRETTY_FUNCTION__, vlan_if->name,
2753 vlan_if->ifindex,
2754 ipaddr2str(ip, buf, sizeof(buf)),
2755 vlan_if->vrf_id);
2756
2757 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
2758 vlan_if->ifindex);
2759 if (ret < 0)
2760 return ret;
2761
2762 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2763 &dp_info, 1, 0);
2764
2765 return ret;
2766 }
2767
2768 int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
2769 {
2770 int len;
2771 struct ndmsg *ndm;
2772
2773 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2774 return 0;
2775
2776 /* Length validity. */
2777 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2778 if (len < 0) {
2779 zlog_err("%s: Message received from netlink is of a broken size %d %zu",
2780 __PRETTY_FUNCTION__, h->nlmsg_len,
2781 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
2782 return -1;
2783 }
2784
2785 /* Is this a notification for the MAC FDB or IP neighbor table? */
2786 ndm = NLMSG_DATA(h);
2787 if (ndm->ndm_family == AF_BRIDGE)
2788 return netlink_macfdb_change(h, len, ns_id);
2789
2790 if (ndm->ndm_type != RTN_UNICAST)
2791 return 0;
2792
2793 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2794 return netlink_ipneigh_change(h, len, ns_id);
2795 else {
2796 flog_warn(
2797 EC_ZEBRA_UNKNOWN_FAMILY,
2798 "Invalid address family: %u received from kernel neighbor change: %s",
2799 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
2800 return 0;
2801 }
2802
2803 return 0;
2804 }
2805
2806 /*
2807 * Utility neighbor-update function, using info from dplane context.
2808 */
2809 static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
2810 int cmd)
2811 {
2812 struct {
2813 struct nlmsghdr n;
2814 struct ndmsg ndm;
2815 char buf[256];
2816 } req;
2817 int ipa_len;
2818 char buf[INET6_ADDRSTRLEN];
2819 char buf2[ETHER_ADDR_STRLEN];
2820 const struct ipaddr *ip;
2821 const struct ethaddr *mac;
2822 uint8_t flags;
2823 uint16_t state;
2824
2825 memset(&req, 0, sizeof(req));
2826
2827 ip = dplane_ctx_neigh_get_ipaddr(ctx);
2828 mac = dplane_ctx_neigh_get_mac(ctx);
2829 if (is_zero_mac(mac))
2830 mac = NULL;
2831
2832 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
2833 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
2834
2835 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2836 req.n.nlmsg_flags = NLM_F_REQUEST;
2837 if (cmd == RTM_NEWNEIGH)
2838 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2839 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2840 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2841 req.ndm.ndm_state = state;
2842 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
2843 req.ndm.ndm_type = RTN_UNICAST;
2844 req.ndm.ndm_flags = flags;
2845
2846 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2847 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2848 if (mac)
2849 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2850
2851 if (IS_ZEBRA_DEBUG_KERNEL)
2852 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x",
2853 nl_msg_type_to_str(cmd),
2854 nl_family_to_str(req.ndm.ndm_family),
2855 dplane_ctx_get_ifname(ctx),
2856 dplane_ctx_get_ifindex(ctx),
2857 ipaddr2str(ip, buf, sizeof(buf)),
2858 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2859 : "null",
2860 flags, state);
2861
2862 return netlink_talk_info(netlink_talk_filter, &req.n,
2863 dplane_ctx_get_ns(ctx), 0);
2864 }
2865
2866 /*
2867 * Update MAC, using dataplane context object.
2868 */
2869 enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx)
2870 {
2871 return netlink_macfdb_update_ctx(ctx);
2872 }
2873
2874 enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx)
2875 {
2876 int ret = -1;
2877
2878 switch (dplane_ctx_get_op(ctx)) {
2879 case DPLANE_OP_NEIGH_INSTALL:
2880 case DPLANE_OP_NEIGH_UPDATE:
2881 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH);
2882 break;
2883 case DPLANE_OP_NEIGH_DELETE:
2884 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH);
2885 break;
2886 default:
2887 break;
2888 }
2889
2890 return (ret == 0 ?
2891 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
2892 }
2893
2894 /*
2895 * MPLS label forwarding table change via netlink interface, using dataplane
2896 * context information.
2897 */
2898 int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx)
2899 {
2900 mpls_lse_t lse;
2901 const zebra_nhlfe_t *nhlfe;
2902 struct nexthop *nexthop = NULL;
2903 unsigned int nexthop_num;
2904 const char *routedesc;
2905 int route_type;
2906
2907 struct {
2908 struct nlmsghdr n;
2909 struct rtmsg r;
2910 char buf[NL_PKT_BUF_SIZE];
2911 } req;
2912
2913 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
2914
2915 /*
2916 * Count # nexthops so we can decide whether to use singlepath
2917 * or multipath case.
2918 */
2919 nexthop_num = 0;
2920 for (nhlfe = dplane_ctx_get_nhlfe(ctx); nhlfe; nhlfe = nhlfe->next) {
2921 nexthop = nhlfe->nexthop;
2922 if (!nexthop)
2923 continue;
2924 if (cmd == RTM_NEWROUTE) {
2925 /* Count all selected NHLFEs */
2926 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2927 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2928 nexthop_num++;
2929 } else { /* DEL */
2930 /* Count all installed NHLFEs */
2931 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2932 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2933 nexthop_num++;
2934 }
2935 }
2936
2937 if ((nexthop_num == 0) ||
2938 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
2939 return 0;
2940
2941 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2942 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2943 req.n.nlmsg_type = cmd;
2944 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
2945
2946 req.r.rtm_family = AF_MPLS;
2947 req.r.rtm_table = RT_TABLE_MAIN;
2948 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2949 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2950 req.r.rtm_type = RTN_UNICAST;
2951
2952 if (cmd == RTM_NEWROUTE) {
2953 /* We do a replace to handle update. */
2954 req.n.nlmsg_flags |= NLM_F_REPLACE;
2955
2956 /* set the protocol value if installing */
2957 route_type = re_type_from_lsp_type(
2958 dplane_ctx_get_best_nhlfe(ctx)->type);
2959 req.r.rtm_protocol = zebra2proto(route_type);
2960 }
2961
2962 /* Fill destination */
2963 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
2964 addattr_l(&req.n, sizeof(req), RTA_DST, &lse, sizeof(mpls_lse_t));
2965
2966 /* Fill nexthops (paths) based on single-path or multipath. The paths
2967 * chosen depend on the operation.
2968 */
2969 if (nexthop_num == 1) {
2970 routedesc = "single-path";
2971 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
2972 routedesc);
2973
2974 nexthop_num = 0;
2975 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
2976 nhlfe; nhlfe = nhlfe->next) {
2977 nexthop = nhlfe->nexthop;
2978 if (!nexthop)
2979 continue;
2980
2981 if ((cmd == RTM_NEWROUTE
2982 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2983 && CHECK_FLAG(nexthop->flags,
2984 NEXTHOP_FLAG_ACTIVE)))
2985 || (cmd == RTM_DELROUTE
2986 && (CHECK_FLAG(nhlfe->flags,
2987 NHLFE_FLAG_INSTALLED)
2988 && CHECK_FLAG(nexthop->flags,
2989 NEXTHOP_FLAG_FIB)))) {
2990 /* Add the gateway */
2991 _netlink_mpls_build_singlepath(
2992 routedesc, nhlfe,
2993 &req.n, &req.r,
2994 sizeof(req), cmd);
2995
2996 nexthop_num++;
2997 break;
2998 }
2999 }
3000 } else { /* Multipath case */
3001 char buf[NL_PKT_BUF_SIZE];
3002 struct rtattr *rta = (void *)buf;
3003 struct rtnexthop *rtnh;
3004 const union g_addr *src1 = NULL;
3005
3006 rta->rta_type = RTA_MULTIPATH;
3007 rta->rta_len = RTA_LENGTH(0);
3008 rtnh = RTA_DATA(rta);
3009
3010 routedesc = "multipath";
3011 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3012 routedesc);
3013
3014 nexthop_num = 0;
3015 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
3016 nhlfe; nhlfe = nhlfe->next) {
3017 nexthop = nhlfe->nexthop;
3018 if (!nexthop)
3019 continue;
3020
3021 if ((cmd == RTM_NEWROUTE
3022 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3023 && CHECK_FLAG(nexthop->flags,
3024 NEXTHOP_FLAG_ACTIVE)))
3025 || (cmd == RTM_DELROUTE
3026 && (CHECK_FLAG(nhlfe->flags,
3027 NHLFE_FLAG_INSTALLED)
3028 && CHECK_FLAG(nexthop->flags,
3029 NEXTHOP_FLAG_FIB)))) {
3030 nexthop_num++;
3031
3032 /* Build the multipath */
3033 _netlink_mpls_build_multipath(routedesc, nhlfe,
3034 rta, rtnh, &req.r,
3035 &src1);
3036 rtnh = RTNH_NEXT(rtnh);
3037 }
3038 }
3039
3040 /* Add the multipath */
3041 if (rta->rta_len > RTA_LENGTH(0))
3042 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
3043 RTA_DATA(rta), RTA_PAYLOAD(rta));
3044 }
3045
3046 /* Talk to netlink socket. */
3047 return netlink_talk_info(netlink_talk_filter, &req.n,
3048 dplane_ctx_get_ns(ctx), 0);
3049 }
3050 #endif /* HAVE_NETLINK */