]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
Merge pull request #7220 from idryzhov/fix-clear-isis
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26 #include <linux/lwtunnel.h>
27 #include <linux/mpls_iptunnel.h>
28 #include <linux/neighbour.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/nexthop.h>
31
32 /* Hack for GNU libc version 2. */
33 #ifndef MSG_TRUNC
34 #define MSG_TRUNC 0x20
35 #endif /* MSG_TRUNC */
36
37 #include "linklist.h"
38 #include "if.h"
39 #include "log.h"
40 #include "prefix.h"
41 #include "connected.h"
42 #include "table.h"
43 #include "memory.h"
44 #include "zebra_memory.h"
45 #include "rib.h"
46 #include "thread.h"
47 #include "privs.h"
48 #include "nexthop.h"
49 #include "vrf.h"
50 #include "vty.h"
51 #include "mpls.h"
52 #include "vxlan.h"
53 #include "printfrr.h"
54
55 #include "zebra/zapi_msg.h"
56 #include "zebra/zebra_ns.h"
57 #include "zebra/zebra_vrf.h"
58 #include "zebra/rt.h"
59 #include "zebra/redistribute.h"
60 #include "zebra/interface.h"
61 #include "zebra/debug.h"
62 #include "zebra/rtadv.h"
63 #include "zebra/zebra_ptm.h"
64 #include "zebra/zebra_mpls.h"
65 #include "zebra/kernel_netlink.h"
66 #include "zebra/rt_netlink.h"
67 #include "zebra/zebra_nhg.h"
68 #include "zebra/zebra_mroute.h"
69 #include "zebra/zebra_vxlan.h"
70 #include "zebra/zebra_errors.h"
71 #include "zebra/zebra_evpn_mh.h"
72
73 #ifndef AF_MPLS
74 #define AF_MPLS 28
75 #endif
76
77 /* Re-defining as I am unable to include <linux/if_bridge.h> which has the
78 * UAPI for MAC sync. */
79 #ifndef _UAPI_LINUX_IF_BRIDGE_H
80 /* FDB notification bits for NDA_NOTIFY:
81 * - BR_FDB_NFY_STATIC - notify on activity/expire even for a static entry
82 * - BR_FDB_NFY_INACTIVE - mark as inactive to avoid double notification,
83 * used with BR_FDB_NFY_STATIC (kernel controlled)
84 */
85 enum {
86 BR_FDB_NFY_STATIC,
87 BR_FDB_NFY_INACTIVE,
88 BR_FDB_NFY_MAX
89 };
90 #endif
91
92 static vlanid_t filter_vlan = 0;
93
94 /* We capture whether the current kernel supports nexthop ids; by
95 * default, we'll use them if possible. There's also a configuration
96 * available to _disable_ use of kernel nexthops.
97 */
98 static bool supports_nh;
99
100 struct gw_family_t {
101 uint16_t filler;
102 uint16_t family;
103 union g_addr gate;
104 };
105
106 static const char ipv4_ll_buf[16] = "169.254.0.1";
107 static struct in_addr ipv4_ll;
108
109 /* Is this a ipv4 over ipv6 route? */
110 static bool is_route_v4_over_v6(unsigned char rtm_family,
111 enum nexthop_types_t nexthop_type)
112 {
113 if (rtm_family == AF_INET
114 && (nexthop_type == NEXTHOP_TYPE_IPV6
115 || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
116 return true;
117
118 return false;
119 }
120
121 /* Helper to control use of kernel-level nexthop ids */
122 static bool kernel_nexthops_supported(void)
123 {
124 return (supports_nh && !vrf_is_backend_netns()
125 && zebra_nhg_kernel_nexthops_enabled());
126 }
127
128 /*
129 * Some people may only want to use NHGs created by protos and not
130 * implicitly created by Zebra. This check accounts for that.
131 */
132 static bool proto_nexthops_only(void)
133 {
134 return zebra_nhg_proto_nexthops_only();
135 }
136
137 /* Is this a proto created NHG? */
138 static bool is_proto_nhg(uint32_t id, int type)
139 {
140 /* If type is available, use it as the source of truth */
141 if (type) {
142 if (type != ZEBRA_ROUTE_NHG)
143 return true;
144 return false;
145 }
146
147 if (id >= ZEBRA_NHG_PROTO_LOWER)
148 return true;
149
150 return false;
151 }
152
153 /*
154 * The ipv4_ll data structure is used for all 5549
155 * additions to the kernel. Let's figure out the
156 * correct value one time instead for every
157 * install/remove of a 5549 type route
158 */
159 void rt_netlink_init(void)
160 {
161 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
162 }
163
164 /*
165 * Mapping from dataplane neighbor flags to netlink flags
166 */
167 static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
168 {
169 uint8_t flags = 0;
170
171 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
172 flags |= NTF_EXT_LEARNED;
173 if (dplane_flags & DPLANE_NTF_ROUTER)
174 flags |= NTF_ROUTER;
175 if (dplane_flags & DPLANE_NTF_USE)
176 flags |= NTF_USE;
177
178 return flags;
179 }
180
181 /*
182 * Mapping from dataplane neighbor state to netlink state
183 */
184 static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
185 {
186 uint16_t state = 0;
187
188 if (dplane_state & DPLANE_NUD_REACHABLE)
189 state |= NUD_REACHABLE;
190 if (dplane_state & DPLANE_NUD_STALE)
191 state |= NUD_STALE;
192 if (dplane_state & DPLANE_NUD_NOARP)
193 state |= NUD_NOARP;
194 if (dplane_state & DPLANE_NUD_PROBE)
195 state |= NUD_PROBE;
196 if (dplane_state & DPLANE_NUD_INCOMPLETE)
197 state |= NUD_INCOMPLETE;
198
199 return state;
200 }
201
202
203 static inline bool is_selfroute(int proto)
204 {
205 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
206 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
207 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
208 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
209 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
210 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
211 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)
212 || (proto == RTPROT_SRTE)) {
213 return true;
214 }
215
216 return false;
217 }
218
219 static inline int zebra2proto(int proto)
220 {
221 switch (proto) {
222 case ZEBRA_ROUTE_BABEL:
223 proto = RTPROT_BABEL;
224 break;
225 case ZEBRA_ROUTE_BGP:
226 proto = RTPROT_BGP;
227 break;
228 case ZEBRA_ROUTE_OSPF:
229 case ZEBRA_ROUTE_OSPF6:
230 proto = RTPROT_OSPF;
231 break;
232 case ZEBRA_ROUTE_STATIC:
233 proto = RTPROT_ZSTATIC;
234 break;
235 case ZEBRA_ROUTE_ISIS:
236 proto = RTPROT_ISIS;
237 break;
238 case ZEBRA_ROUTE_RIP:
239 proto = RTPROT_RIP;
240 break;
241 case ZEBRA_ROUTE_RIPNG:
242 proto = RTPROT_RIPNG;
243 break;
244 case ZEBRA_ROUTE_NHRP:
245 proto = RTPROT_NHRP;
246 break;
247 case ZEBRA_ROUTE_EIGRP:
248 proto = RTPROT_EIGRP;
249 break;
250 case ZEBRA_ROUTE_LDP:
251 proto = RTPROT_LDP;
252 break;
253 case ZEBRA_ROUTE_SHARP:
254 proto = RTPROT_SHARP;
255 break;
256 case ZEBRA_ROUTE_PBR:
257 proto = RTPROT_PBR;
258 break;
259 case ZEBRA_ROUTE_OPENFABRIC:
260 proto = RTPROT_OPENFABRIC;
261 break;
262 case ZEBRA_ROUTE_SRTE:
263 proto = RTPROT_SRTE;
264 break;
265 case ZEBRA_ROUTE_TABLE:
266 case ZEBRA_ROUTE_NHG:
267 proto = RTPROT_ZEBRA;
268 break;
269 default:
270 /*
271 * When a user adds a new protocol this will show up
272 * to let them know to do something about it. This
273 * is intentionally a warn because we should see
274 * this as part of development of a new protocol
275 */
276 zlog_debug(
277 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
278 __func__, proto);
279 proto = RTPROT_ZEBRA;
280 break;
281 }
282
283 return proto;
284 }
285
286 static inline int proto2zebra(int proto, int family, bool is_nexthop)
287 {
288 switch (proto) {
289 case RTPROT_BABEL:
290 proto = ZEBRA_ROUTE_BABEL;
291 break;
292 case RTPROT_BGP:
293 proto = ZEBRA_ROUTE_BGP;
294 break;
295 case RTPROT_OSPF:
296 proto = (family == AFI_IP) ? ZEBRA_ROUTE_OSPF
297 : ZEBRA_ROUTE_OSPF6;
298 break;
299 case RTPROT_ISIS:
300 proto = ZEBRA_ROUTE_ISIS;
301 break;
302 case RTPROT_RIP:
303 proto = ZEBRA_ROUTE_RIP;
304 break;
305 case RTPROT_RIPNG:
306 proto = ZEBRA_ROUTE_RIPNG;
307 break;
308 case RTPROT_NHRP:
309 proto = ZEBRA_ROUTE_NHRP;
310 break;
311 case RTPROT_EIGRP:
312 proto = ZEBRA_ROUTE_EIGRP;
313 break;
314 case RTPROT_LDP:
315 proto = ZEBRA_ROUTE_LDP;
316 break;
317 case RTPROT_STATIC:
318 case RTPROT_ZSTATIC:
319 proto = ZEBRA_ROUTE_STATIC;
320 break;
321 case RTPROT_SHARP:
322 proto = ZEBRA_ROUTE_SHARP;
323 break;
324 case RTPROT_PBR:
325 proto = ZEBRA_ROUTE_PBR;
326 break;
327 case RTPROT_OPENFABRIC:
328 proto = ZEBRA_ROUTE_OPENFABRIC;
329 break;
330 case RTPROT_SRTE:
331 proto = ZEBRA_ROUTE_SRTE;
332 break;
333 case RTPROT_ZEBRA:
334 if (is_nexthop) {
335 proto = ZEBRA_ROUTE_NHG;
336 break;
337 }
338 /* Intentional fall thru */
339 default:
340 /*
341 * When a user adds a new protocol this will show up
342 * to let them know to do something about it. This
343 * is intentionally a warn because we should see
344 * this as part of development of a new protocol
345 */
346 zlog_debug(
347 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
348 __func__, proto);
349 proto = ZEBRA_ROUTE_KERNEL;
350 break;
351 }
352 return proto;
353 }
354
355 /*
356 Pending: create an efficient table_id (in a tree/hash) based lookup)
357 */
358 vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
359 {
360 struct vrf *vrf;
361 struct zebra_vrf *zvrf;
362
363 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
364 zvrf = vrf->info;
365 if (zvrf == NULL)
366 continue;
367 /* case vrf with netns : match the netnsid */
368 if (vrf_is_backend_netns()) {
369 if (ns_id == zvrf_id(zvrf))
370 return zvrf_id(zvrf);
371 } else {
372 /* VRF is VRF_BACKEND_VRF_LITE */
373 if (zvrf->table_id != table_id)
374 continue;
375 return zvrf_id(zvrf);
376 }
377 }
378
379 return VRF_DEFAULT;
380 }
381
382 /**
383 * @parse_encap_mpls() - Parses encapsulated mpls attributes
384 * @tb: Pointer to rtattr to look for nested items in.
385 * @labels: Pointer to store labels in.
386 *
387 * Return: Number of mpls labels found.
388 */
389 static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
390 {
391 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
392 mpls_lse_t *lses = NULL;
393 int num_labels = 0;
394 uint32_t ttl = 0;
395 uint32_t bos = 0;
396 uint32_t exp = 0;
397 mpls_label_t label = 0;
398
399 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
400 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
401 while (!bos && num_labels < MPLS_MAX_LABELS) {
402 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
403 labels[num_labels++] = label;
404 }
405
406 return num_labels;
407 }
408
409 static struct nexthop
410 parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
411 enum blackhole_type bh_type, int index, void *prefsrc,
412 void *gate, afi_t afi, vrf_id_t vrf_id)
413 {
414 struct interface *ifp = NULL;
415 struct nexthop nh = {0};
416 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
417 int num_labels = 0;
418
419 vrf_id_t nh_vrf_id = vrf_id;
420 size_t sz = (afi == AFI_IP) ? 4 : 16;
421
422 if (bh_type == BLACKHOLE_UNSPEC) {
423 if (index && !gate)
424 nh.type = NEXTHOP_TYPE_IFINDEX;
425 else if (index && gate)
426 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
427 : NEXTHOP_TYPE_IPV6_IFINDEX;
428 else if (!index && gate)
429 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
430 : NEXTHOP_TYPE_IPV6;
431 else {
432 nh.type = NEXTHOP_TYPE_BLACKHOLE;
433 nh.bh_type = bh_type;
434 }
435 } else {
436 nh.type = NEXTHOP_TYPE_BLACKHOLE;
437 nh.bh_type = bh_type;
438 }
439 nh.ifindex = index;
440 if (prefsrc)
441 memcpy(&nh.src, prefsrc, sz);
442 if (gate)
443 memcpy(&nh.gate, gate, sz);
444
445 if (index) {
446 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
447 if (ifp)
448 nh_vrf_id = ifp->vrf_id;
449 }
450 nh.vrf_id = nh_vrf_id;
451
452 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
453 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
454 == LWTUNNEL_ENCAP_MPLS) {
455 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
456 }
457
458 if (rtm->rtm_flags & RTNH_F_ONLINK)
459 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
460
461 if (num_labels)
462 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
463
464 return nh;
465 }
466
467 static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
468 struct nexthop_group *ng,
469 struct rtmsg *rtm,
470 struct rtnexthop *rtnh,
471 struct rtattr **tb,
472 void *prefsrc, vrf_id_t vrf_id)
473 {
474 void *gate = NULL;
475 struct interface *ifp = NULL;
476 int index = 0;
477 /* MPLS labels */
478 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
479 int num_labels = 0;
480 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
481
482 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
483 vrf_id_t nh_vrf_id = vrf_id;
484
485 for (;;) {
486 struct nexthop *nh = NULL;
487
488 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
489 break;
490
491 index = rtnh->rtnh_ifindex;
492 if (index) {
493 /*
494 * Yes we are looking this up
495 * for every nexthop and just
496 * using the last one looked
497 * up right now
498 */
499 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
500 index);
501 if (ifp)
502 nh_vrf_id = ifp->vrf_id;
503 else {
504 flog_warn(
505 EC_ZEBRA_UNKNOWN_INTERFACE,
506 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
507 __func__, index);
508 nh_vrf_id = VRF_DEFAULT;
509 }
510 } else
511 nh_vrf_id = vrf_id;
512
513 if (rtnh->rtnh_len > sizeof(*rtnh)) {
514 memset(rtnh_tb, 0, sizeof(rtnh_tb));
515
516 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
517 rtnh->rtnh_len - sizeof(*rtnh));
518 if (rtnh_tb[RTA_GATEWAY])
519 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
520 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
521 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
522 == LWTUNNEL_ENCAP_MPLS) {
523 num_labels = parse_encap_mpls(
524 rtnh_tb[RTA_ENCAP], labels);
525 }
526 }
527
528 if (gate && rtm->rtm_family == AF_INET) {
529 if (index)
530 nh = nexthop_from_ipv4_ifindex(
531 gate, prefsrc, index, nh_vrf_id);
532 else
533 nh = nexthop_from_ipv4(gate, prefsrc,
534 nh_vrf_id);
535 } else if (gate && rtm->rtm_family == AF_INET6) {
536 if (index)
537 nh = nexthop_from_ipv6_ifindex(
538 gate, index, nh_vrf_id);
539 else
540 nh = nexthop_from_ipv6(gate, nh_vrf_id);
541 } else
542 nh = nexthop_from_ifindex(index, nh_vrf_id);
543
544 if (nh) {
545 nh->weight = rtnh->rtnh_hops + 1;
546
547 if (num_labels)
548 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
549 num_labels, labels);
550
551 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
552 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
553
554 /* Add to temporary list */
555 nexthop_group_add_sorted(ng, nh);
556 }
557
558 if (rtnh->rtnh_len == 0)
559 break;
560
561 len -= NLMSG_ALIGN(rtnh->rtnh_len);
562 rtnh = RTNH_NEXT(rtnh);
563 }
564
565 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
566
567 return nhop_num;
568 }
569
570 /* Looking up routing table by netlink interface. */
571 static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
572 int startup)
573 {
574 int len;
575 struct rtmsg *rtm;
576 struct rtattr *tb[RTA_MAX + 1];
577 uint8_t flags = 0;
578 struct prefix p;
579 struct prefix_ipv6 src_p = {};
580 vrf_id_t vrf_id;
581 bool selfroute;
582
583 char anyaddr[16] = {0};
584
585 int proto = ZEBRA_ROUTE_KERNEL;
586 int index = 0;
587 int table;
588 int metric = 0;
589 uint32_t mtu = 0;
590 uint8_t distance = 0;
591 route_tag_t tag = 0;
592 uint32_t nhe_id = 0;
593
594 void *dest = NULL;
595 void *gate = NULL;
596 void *prefsrc = NULL; /* IPv4 preferred source host address */
597 void *src = NULL; /* IPv6 srcdest source prefix */
598 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
599
600 rtm = NLMSG_DATA(h);
601
602 if (startup && h->nlmsg_type != RTM_NEWROUTE)
603 return 0;
604 switch (rtm->rtm_type) {
605 case RTN_UNICAST:
606 break;
607 case RTN_BLACKHOLE:
608 bh_type = BLACKHOLE_NULL;
609 break;
610 case RTN_UNREACHABLE:
611 bh_type = BLACKHOLE_REJECT;
612 break;
613 case RTN_PROHIBIT:
614 bh_type = BLACKHOLE_ADMINPROHIB;
615 break;
616 default:
617 if (IS_ZEBRA_DEBUG_KERNEL)
618 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
619 nl_rttype_to_str(rtm->rtm_type),
620 rtm->rtm_type);
621 return 0;
622 }
623
624 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
625 if (len < 0) {
626 zlog_err(
627 "%s: Message received from netlink is of a broken size %d %zu",
628 __func__, h->nlmsg_len,
629 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
630 return -1;
631 }
632
633 memset(tb, 0, sizeof(tb));
634 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
635
636 if (rtm->rtm_flags & RTM_F_CLONED)
637 return 0;
638 if (rtm->rtm_protocol == RTPROT_REDIRECT)
639 return 0;
640 if (rtm->rtm_protocol == RTPROT_KERNEL)
641 return 0;
642
643 selfroute = is_selfroute(rtm->rtm_protocol);
644
645 if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE) {
646 if (IS_ZEBRA_DEBUG_KERNEL)
647 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
648 rtm->rtm_protocol);
649 return 0;
650 }
651
652 /* We don't care about change notifications for the MPLS table. */
653 /* TODO: Revisit this. */
654 if (rtm->rtm_family == AF_MPLS)
655 return 0;
656
657 /* Table corresponding to route. */
658 if (tb[RTA_TABLE])
659 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
660 else
661 table = rtm->rtm_table;
662
663 /* Map to VRF */
664 vrf_id = vrf_lookup_by_table(table, ns_id);
665 if (vrf_id == VRF_DEFAULT) {
666 if (!is_zebra_valid_kernel_table(table)
667 && !is_zebra_main_routing_table(table))
668 return 0;
669 }
670
671 if (rtm->rtm_flags & RTM_F_TRAP)
672 flags |= ZEBRA_FLAG_TRAPPED;
673 if (rtm->rtm_flags & RTM_F_OFFLOAD)
674 flags |= ZEBRA_FLAG_OFFLOADED;
675
676 /* Route which inserted by Zebra. */
677 if (selfroute) {
678 flags |= ZEBRA_FLAG_SELFROUTE;
679 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
680 }
681 if (tb[RTA_OIF])
682 index = *(int *)RTA_DATA(tb[RTA_OIF]);
683
684 if (tb[RTA_DST])
685 dest = RTA_DATA(tb[RTA_DST]);
686 else
687 dest = anyaddr;
688
689 if (tb[RTA_SRC])
690 src = RTA_DATA(tb[RTA_SRC]);
691 else
692 src = anyaddr;
693
694 if (tb[RTA_PREFSRC])
695 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
696
697 if (tb[RTA_GATEWAY])
698 gate = RTA_DATA(tb[RTA_GATEWAY]);
699
700 if (tb[RTA_NH_ID])
701 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
702
703 if (tb[RTA_PRIORITY])
704 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
705
706 #if defined(SUPPORT_REALMS)
707 if (tb[RTA_FLOW])
708 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
709 #endif
710
711 if (tb[RTA_METRICS]) {
712 struct rtattr *mxrta[RTAX_MAX + 1];
713
714 memset(mxrta, 0, sizeof(mxrta));
715 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
716 RTA_PAYLOAD(tb[RTA_METRICS]));
717
718 if (mxrta[RTAX_MTU])
719 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
720 }
721
722 if (rtm->rtm_family == AF_INET) {
723 p.family = AF_INET;
724 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
725 zlog_err(
726 "Invalid destination prefix length: %u received from kernel route change",
727 rtm->rtm_dst_len);
728 return -1;
729 }
730 memcpy(&p.u.prefix4, dest, 4);
731 p.prefixlen = rtm->rtm_dst_len;
732
733 if (rtm->rtm_src_len != 0) {
734 char buf[PREFIX_STRLEN];
735 flog_warn(
736 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
737 "unsupported IPv4 sourcedest route (dest %s vrf %u)",
738 prefix2str(&p, buf, sizeof(buf)), vrf_id);
739 return 0;
740 }
741
742 /* Force debug below to not display anything for source */
743 src_p.prefixlen = 0;
744 } else if (rtm->rtm_family == AF_INET6) {
745 p.family = AF_INET6;
746 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
747 zlog_err(
748 "Invalid destination prefix length: %u received from kernel route change",
749 rtm->rtm_dst_len);
750 return -1;
751 }
752 memcpy(&p.u.prefix6, dest, 16);
753 p.prefixlen = rtm->rtm_dst_len;
754
755 src_p.family = AF_INET6;
756 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
757 zlog_err(
758 "Invalid source prefix length: %u received from kernel route change",
759 rtm->rtm_src_len);
760 return -1;
761 }
762 memcpy(&src_p.prefix, src, 16);
763 src_p.prefixlen = rtm->rtm_src_len;
764 }
765
766 /*
767 * For ZEBRA_ROUTE_KERNEL types:
768 *
769 * The metric/priority of the route received from the kernel
770 * is a 32 bit number. We are going to interpret the high
771 * order byte as the Admin Distance and the low order 3 bytes
772 * as the metric.
773 *
774 * This will allow us to do two things:
775 * 1) Allow the creation of kernel routes that can be
776 * overridden by zebra.
777 * 2) Allow the old behavior for 'most' kernel route types
778 * if a user enters 'ip route ...' v4 routes get a metric
779 * of 0 and v6 routes get a metric of 1024. Both of these
780 * values will end up with a admin distance of 0, which
781 * will cause them to win for the purposes of zebra.
782 */
783 if (proto == ZEBRA_ROUTE_KERNEL) {
784 distance = (metric >> 24) & 0xFF;
785 metric = (metric & 0x00FFFFFF);
786 }
787
788 if (IS_ZEBRA_DEBUG_KERNEL) {
789 char buf[PREFIX_STRLEN];
790 char buf2[PREFIX_STRLEN];
791 zlog_debug(
792 "%s %s%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
793 nl_msg_type_to_str(h->nlmsg_type),
794 prefix2str(&p, buf, sizeof(buf)),
795 src_p.prefixlen ? " from " : "",
796 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
797 : "",
798 vrf_id_to_name(vrf_id), vrf_id, table, metric,
799 distance);
800 }
801
802 afi_t afi = AFI_IP;
803 if (rtm->rtm_family == AF_INET6)
804 afi = AFI_IP6;
805
806 if (h->nlmsg_type == RTM_NEWROUTE) {
807
808 if (!tb[RTA_MULTIPATH]) {
809 struct nexthop nh = {0};
810
811 if (!nhe_id) {
812 nh = parse_nexthop_unicast(
813 ns_id, rtm, tb, bh_type, index, prefsrc,
814 gate, afi, vrf_id);
815 }
816 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
817 &src_p, &nh, nhe_id, table, metric, mtu,
818 distance, tag);
819 } else {
820 /* This is a multipath route */
821 struct route_entry *re;
822 struct nexthop_group *ng = NULL;
823 struct rtnexthop *rtnh =
824 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
825
826 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
827 re->type = proto;
828 re->distance = distance;
829 re->flags = flags;
830 re->metric = metric;
831 re->mtu = mtu;
832 re->vrf_id = vrf_id;
833 re->table = table;
834 re->uptime = monotime(NULL);
835 re->tag = tag;
836 re->nhe_id = nhe_id;
837
838 if (!nhe_id) {
839 uint8_t nhop_num;
840
841 /* Use temporary list of nexthops; parse
842 * message payload's nexthops.
843 */
844 ng = nexthop_group_new();
845 nhop_num =
846 parse_multipath_nexthops_unicast(
847 ns_id, ng, rtm, rtnh, tb,
848 prefsrc, vrf_id);
849
850 zserv_nexthop_num_warn(
851 __func__, (const struct prefix *)&p,
852 nhop_num);
853
854 if (nhop_num == 0) {
855 nexthop_group_delete(&ng);
856 ng = NULL;
857 }
858 }
859
860 if (nhe_id || ng)
861 rib_add_multipath(afi, SAFI_UNICAST, &p,
862 &src_p, re, ng);
863 else
864 XFREE(MTYPE_RE, re);
865 }
866 } else {
867 if (nhe_id) {
868 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
869 &p, &src_p, NULL, nhe_id, table, metric,
870 distance, true, false);
871 } else {
872 if (!tb[RTA_MULTIPATH]) {
873 struct nexthop nh;
874
875 nh = parse_nexthop_unicast(
876 ns_id, rtm, tb, bh_type, index, prefsrc,
877 gate, afi, vrf_id);
878 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
879 flags, &p, &src_p, &nh, 0, table,
880 metric, distance, true, false);
881 } else {
882 /* XXX: need to compare the entire list of
883 * nexthops here for NLM_F_APPEND stupidity */
884 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
885 flags, &p, &src_p, NULL, 0, table,
886 metric, distance, true, false);
887 }
888 }
889 }
890
891 return 0;
892 }
893
894 static struct mcast_route_data *mroute = NULL;
895
896 static int netlink_route_change_read_multicast(struct nlmsghdr *h,
897 ns_id_t ns_id, int startup)
898 {
899 int len;
900 struct rtmsg *rtm;
901 struct rtattr *tb[RTA_MAX + 1];
902 struct mcast_route_data *m;
903 struct mcast_route_data mr;
904 int iif = 0;
905 int count;
906 int oif[256];
907 int oif_count = 0;
908 char sbuf[40];
909 char gbuf[40];
910 char oif_list[256] = "\0";
911 vrf_id_t vrf;
912 int table;
913
914 if (mroute)
915 m = mroute;
916 else {
917 memset(&mr, 0, sizeof(mr));
918 m = &mr;
919 }
920
921 rtm = NLMSG_DATA(h);
922
923 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
924
925 memset(tb, 0, sizeof(tb));
926 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
927
928 if (tb[RTA_TABLE])
929 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
930 else
931 table = rtm->rtm_table;
932
933 vrf = vrf_lookup_by_table(table, ns_id);
934
935 if (tb[RTA_IIF])
936 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
937
938 if (tb[RTA_SRC])
939 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
940
941 if (tb[RTA_DST])
942 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
943
944 if (tb[RTA_EXPIRES])
945 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
946
947 if (tb[RTA_MULTIPATH]) {
948 struct rtnexthop *rtnh =
949 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
950
951 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
952 for (;;) {
953 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
954 break;
955
956 oif[oif_count] = rtnh->rtnh_ifindex;
957 oif_count++;
958
959 if (rtnh->rtnh_len == 0)
960 break;
961
962 len -= NLMSG_ALIGN(rtnh->rtnh_len);
963 rtnh = RTNH_NEXT(rtnh);
964 }
965 }
966
967 if (IS_ZEBRA_DEBUG_KERNEL) {
968 struct interface *ifp = NULL;
969 struct zebra_vrf *zvrf = NULL;
970
971 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
972 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
973 for (count = 0; count < oif_count; count++) {
974 ifp = if_lookup_by_index(oif[count], vrf);
975 char temp[256];
976
977 snprintf(temp, sizeof(temp), "%s(%d) ",
978 ifp ? ifp->name : "Unknown", oif[count]);
979 strlcat(oif_list, temp, sizeof(oif_list));
980 }
981 zvrf = zebra_vrf_lookup_by_id(vrf);
982 ifp = if_lookup_by_index(iif, vrf);
983 zlog_debug(
984 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s(%d) OIF: %s jiffies: %lld",
985 zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
986 sbuf, gbuf, ifp ? ifp->name : "Unknown", iif, oif_list,
987 m->lastused);
988 }
989 return 0;
990 }
991
992 int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
993 {
994 int len;
995 struct rtmsg *rtm;
996
997 rtm = NLMSG_DATA(h);
998
999 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
1000 /* If this is not route add/delete message print warning. */
1001 zlog_debug("Kernel message: %s NS %u",
1002 nl_msg_type_to_str(h->nlmsg_type), ns_id);
1003 return 0;
1004 }
1005
1006 if (!(rtm->rtm_family == AF_INET ||
1007 rtm->rtm_family == AF_INET6 ||
1008 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
1009 flog_warn(
1010 EC_ZEBRA_UNKNOWN_FAMILY,
1011 "Invalid address family: %u received from kernel route change: %s",
1012 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
1013 return 0;
1014 }
1015
1016 /* Connected route. */
1017 if (IS_ZEBRA_DEBUG_KERNEL)
1018 zlog_debug("%s %s %s proto %s NS %u",
1019 nl_msg_type_to_str(h->nlmsg_type),
1020 nl_family_to_str(rtm->rtm_family),
1021 nl_rttype_to_str(rtm->rtm_type),
1022 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
1023
1024
1025 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1026 if (len < 0) {
1027 zlog_err(
1028 "%s: Message received from netlink is of a broken size: %d %zu",
1029 __func__, h->nlmsg_len,
1030 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
1031 return -1;
1032 }
1033
1034 if (rtm->rtm_type == RTN_MULTICAST)
1035 netlink_route_change_read_multicast(h, ns_id, startup);
1036 else
1037 netlink_route_change_read_unicast(h, ns_id, startup);
1038 return 0;
1039 }
1040
1041 /* Request for specific route information from the kernel */
1042 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
1043 {
1044 struct {
1045 struct nlmsghdr n;
1046 struct rtmsg rtm;
1047 } req;
1048
1049 /* Form the request, specifying filter (rtattr) if needed. */
1050 memset(&req, 0, sizeof(req));
1051 req.n.nlmsg_type = type;
1052 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
1053 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1054 req.rtm.rtm_family = family;
1055
1056 return netlink_request(&zns->netlink_cmd, &req);
1057 }
1058
1059 /* Routing table read function using netlink interface. Only called
1060 bootstrap time. */
1061 int netlink_route_read(struct zebra_ns *zns)
1062 {
1063 int ret;
1064 struct zebra_dplane_info dp_info;
1065
1066 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
1067
1068 /* Get IPv4 routing table. */
1069 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1070 if (ret < 0)
1071 return ret;
1072 ret = netlink_parse_info(netlink_route_change_read_unicast,
1073 &zns->netlink_cmd, &dp_info, 0, 1);
1074 if (ret < 0)
1075 return ret;
1076
1077 /* Get IPv6 routing table. */
1078 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1079 if (ret < 0)
1080 return ret;
1081 ret = netlink_parse_info(netlink_route_change_read_unicast,
1082 &zns->netlink_cmd, &dp_info, 0, 1);
1083 if (ret < 0)
1084 return ret;
1085
1086 return 0;
1087 }
1088
1089 /*
1090 * The function returns true if the gateway info could be added
1091 * to the message, otherwise false is returned.
1092 */
1093 static bool _netlink_route_add_gateway_info(uint8_t route_family,
1094 uint8_t gw_family,
1095 struct nlmsghdr *nlmsg,
1096 size_t req_size, int bytelen,
1097 const struct nexthop *nexthop)
1098 {
1099 if (route_family == AF_MPLS) {
1100 struct gw_family_t gw_fam;
1101
1102 gw_fam.family = gw_family;
1103 if (gw_family == AF_INET)
1104 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1105 else
1106 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1107 if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1108 bytelen + 2))
1109 return false;
1110 } else {
1111 if (!(nexthop->rparent
1112 && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1113 if (gw_family == AF_INET) {
1114 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1115 &nexthop->gate.ipv4, bytelen))
1116 return false;
1117 } else {
1118 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1119 &nexthop->gate.ipv6, bytelen))
1120 return false;
1121 }
1122 }
1123 }
1124
1125 return true;
1126 }
1127
1128 static int build_label_stack(struct mpls_label_stack *nh_label,
1129 mpls_lse_t *out_lse, char *label_buf,
1130 size_t label_buf_size)
1131 {
1132 char label_buf1[20];
1133 int num_labels = 0;
1134
1135 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1136 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1137 continue;
1138
1139 if (IS_ZEBRA_DEBUG_KERNEL) {
1140 if (!num_labels)
1141 sprintf(label_buf, "label %u",
1142 nh_label->label[i]);
1143 else {
1144 snprintf(label_buf1, sizeof(label_buf1), "/%u",
1145 nh_label->label[i]);
1146 strlcat(label_buf, label_buf1, label_buf_size);
1147 }
1148 }
1149
1150 out_lse[num_labels] =
1151 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1152 num_labels++;
1153 }
1154
1155 return num_labels;
1156 }
1157
1158 static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label,
1159 struct nlmsghdr *nlmsg,
1160 size_t buflen, struct rtmsg *rtmsg,
1161 char *label_buf,
1162 size_t label_buf_size)
1163 {
1164 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1165 int num_labels;
1166
1167 /*
1168 * label_buf is *only* currently used within debugging.
1169 * As such when we assign it we are guarding it inside
1170 * a debug test. If you want to change this make sure
1171 * you fix this assumption
1172 */
1173 label_buf[0] = '\0';
1174
1175 num_labels =
1176 build_label_stack(nh_label, out_lse, label_buf, label_buf_size);
1177
1178 if (num_labels) {
1179 /* Set the BoS bit */
1180 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1181
1182 if (rtmsg->rtm_family == AF_MPLS) {
1183 if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
1184 num_labels * sizeof(mpls_lse_t)))
1185 return false;
1186 } else {
1187 struct rtattr *nest;
1188
1189 if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1190 LWTUNNEL_ENCAP_MPLS))
1191 return false;
1192
1193 nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
1194 if (!nest)
1195 return false;
1196
1197 if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
1198 &out_lse,
1199 num_labels * sizeof(mpls_lse_t)))
1200 return false;
1201 nl_attr_nest_end(nlmsg, nest);
1202 }
1203 }
1204
1205 return true;
1206 }
1207
1208 static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1209 int family,
1210 struct nlmsghdr *nlmsg,
1211 size_t buflen, int bytelen)
1212 {
1213 if (family == AF_INET) {
1214 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1215 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1216 &nexthop->rmap_src.ipv4, bytelen))
1217 return false;
1218 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1219 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1220 &nexthop->src.ipv4, bytelen))
1221 return false;
1222 }
1223 } else if (family == AF_INET6) {
1224 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1225 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1226 &nexthop->rmap_src.ipv6, bytelen))
1227 return false;
1228 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1229 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1230 &nexthop->src.ipv6, bytelen))
1231 return false;
1232 }
1233 }
1234
1235 return true;
1236 }
1237
1238 /* This function takes a nexthop as argument and adds
1239 * the appropriate netlink attributes to an existing
1240 * netlink message.
1241 *
1242 * @param routedesc: Human readable description of route type
1243 * (direct/recursive, single-/multipath)
1244 * @param bytelen: Length of addresses in bytes.
1245 * @param nexthop: Nexthop information
1246 * @param nlmsg: nlmsghdr structure to fill in.
1247 * @param req_size: The size allocated for the message.
1248 *
1249 * The function returns true if the nexthop could be added
1250 * to the message, otherwise false is returned.
1251 */
1252 static bool _netlink_route_build_singlepath(const struct prefix *p,
1253 const char *routedesc, int bytelen,
1254 const struct nexthop *nexthop,
1255 struct nlmsghdr *nlmsg,
1256 struct rtmsg *rtmsg,
1257 size_t req_size, int cmd)
1258 {
1259
1260 char label_buf[256];
1261 struct vrf *vrf;
1262 char addrstr[INET6_ADDRSTRLEN];
1263
1264 assert(nexthop);
1265
1266 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1267
1268 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1269 req_size, rtmsg, label_buf,
1270 sizeof(label_buf)))
1271 return false;
1272
1273 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1274 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1275
1276 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1277 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1278 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1279 return false;
1280 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1281 return false;
1282
1283 if (cmd == RTM_NEWROUTE) {
1284 if (!_netlink_route_encode_nexthop_src(
1285 nexthop, AF_INET, nlmsg, req_size, bytelen))
1286 return false;
1287 }
1288
1289 if (IS_ZEBRA_DEBUG_KERNEL)
1290 zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1291 __func__, routedesc, p, ipv4_ll_buf,
1292 label_buf, nexthop->ifindex,
1293 VRF_LOGNAME(vrf), nexthop->vrf_id);
1294 return true;
1295 }
1296
1297 if (nexthop->type == NEXTHOP_TYPE_IPV4
1298 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1299 /* Send deletes to the kernel without specifying the next-hop */
1300 if (cmd != RTM_DELROUTE) {
1301 if (!_netlink_route_add_gateway_info(
1302 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1303 bytelen, nexthop))
1304 return false;
1305 }
1306
1307 if (cmd == RTM_NEWROUTE) {
1308 if (!_netlink_route_encode_nexthop_src(
1309 nexthop, AF_INET, nlmsg, req_size, bytelen))
1310 return false;
1311 }
1312
1313 if (IS_ZEBRA_DEBUG_KERNEL) {
1314 inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1315 sizeof(addrstr));
1316 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1317 __func__, routedesc, p, addrstr, label_buf,
1318 nexthop->ifindex, VRF_LOGNAME(vrf),
1319 nexthop->vrf_id);
1320 }
1321 }
1322
1323 if (nexthop->type == NEXTHOP_TYPE_IPV6
1324 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1325 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1326 AF_INET6, nlmsg, req_size,
1327 bytelen, nexthop))
1328 return false;
1329
1330 if (cmd == RTM_NEWROUTE) {
1331 if (!_netlink_route_encode_nexthop_src(
1332 nexthop, AF_INET6, nlmsg, req_size,
1333 bytelen))
1334 return false;
1335 }
1336
1337 if (IS_ZEBRA_DEBUG_KERNEL) {
1338 inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1339 sizeof(addrstr));
1340 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1341 __func__, routedesc, p, addrstr, label_buf,
1342 nexthop->ifindex, VRF_LOGNAME(vrf),
1343 nexthop->vrf_id);
1344 }
1345 }
1346
1347 /*
1348 * We have the ifindex so we should always send it
1349 * This is especially useful if we are doing route
1350 * leaking.
1351 */
1352 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1353 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1354 return false;
1355 }
1356
1357 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1358 if (cmd == RTM_NEWROUTE) {
1359 if (!_netlink_route_encode_nexthop_src(
1360 nexthop, AF_INET, nlmsg, req_size, bytelen))
1361 return false;
1362 }
1363
1364 if (IS_ZEBRA_DEBUG_KERNEL)
1365 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1366 __func__, routedesc, p, nexthop->ifindex,
1367 VRF_LOGNAME(vrf), nexthop->vrf_id);
1368 }
1369
1370 return true;
1371 }
1372
1373 /* This function takes a nexthop as argument and
1374 * appends to the given netlink msg. If the nexthop
1375 * defines a preferred source, the src parameter
1376 * will be modified to point to that src, otherwise
1377 * it will be kept unmodified.
1378 *
1379 * @param routedesc: Human readable description of route type
1380 * (direct/recursive, single-/multipath)
1381 * @param bytelen: Length of addresses in bytes.
1382 * @param nexthop: Nexthop information
1383 * @param nlmsg: nlmsghdr structure to fill in.
1384 * @param req_size: The size allocated for the message.
1385 * @param src: pointer pointing to a location where
1386 * the prefsrc should be stored.
1387 *
1388 * The function returns true if the nexthop could be added
1389 * to the message, otherwise false is returned.
1390 */
1391 static bool _netlink_route_build_multipath(const struct prefix *p,
1392 const char *routedesc, int bytelen,
1393 const struct nexthop *nexthop,
1394 struct nlmsghdr *nlmsg,
1395 size_t req_size, struct rtmsg *rtmsg,
1396 const union g_addr **src)
1397 {
1398 char label_buf[256];
1399 struct vrf *vrf;
1400 struct rtnexthop *rtnh;
1401
1402 rtnh = nl_attr_rtnh(nlmsg, req_size);
1403 if (rtnh == NULL)
1404 return false;
1405
1406 assert(nexthop);
1407
1408 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1409
1410 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1411 req_size, rtmsg, label_buf,
1412 sizeof(label_buf)))
1413 return false;
1414
1415 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1416 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1417
1418 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1419 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1420 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1421 return false;
1422 rtnh->rtnh_ifindex = nexthop->ifindex;
1423 if (nexthop->weight)
1424 rtnh->rtnh_hops = nexthop->weight - 1;
1425
1426 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1427 *src = &nexthop->rmap_src;
1428 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1429 *src = &nexthop->src;
1430
1431 if (IS_ZEBRA_DEBUG_KERNEL)
1432 zlog_debug(
1433 "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1434 __func__, routedesc, p, ipv4_ll_buf, label_buf,
1435 nexthop->ifindex, VRF_LOGNAME(vrf),
1436 nexthop->vrf_id);
1437 nl_attr_rtnh_end(nlmsg, rtnh);
1438 return true;
1439 }
1440
1441 if (nexthop->type == NEXTHOP_TYPE_IPV4
1442 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1443 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1444 nlmsg, req_size, bytelen,
1445 nexthop))
1446 return false;
1447
1448 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1449 *src = &nexthop->rmap_src;
1450 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1451 *src = &nexthop->src;
1452
1453 if (IS_ZEBRA_DEBUG_KERNEL)
1454 zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1455 __func__, routedesc, p, &nexthop->gate.ipv4,
1456 label_buf, nexthop->ifindex,
1457 VRF_LOGNAME(vrf), nexthop->vrf_id);
1458 }
1459 if (nexthop->type == NEXTHOP_TYPE_IPV6
1460 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1461 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1462 AF_INET6, nlmsg, req_size,
1463 bytelen, nexthop))
1464 return false;
1465
1466 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1467 *src = &nexthop->rmap_src;
1468 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1469 *src = &nexthop->src;
1470
1471 if (IS_ZEBRA_DEBUG_KERNEL)
1472 zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1473 __func__, routedesc, p, &nexthop->gate.ipv6,
1474 label_buf, nexthop->ifindex,
1475 VRF_LOGNAME(vrf), nexthop->vrf_id);
1476 }
1477
1478 /*
1479 * We have figured out the ifindex so we should always send it
1480 * This is especially useful if we are doing route
1481 * leaking.
1482 */
1483 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1484 rtnh->rtnh_ifindex = nexthop->ifindex;
1485
1486 /* ifindex */
1487 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1488 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1489 *src = &nexthop->rmap_src;
1490 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1491 *src = &nexthop->src;
1492
1493 if (IS_ZEBRA_DEBUG_KERNEL)
1494 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1495 __func__, routedesc, p, nexthop->ifindex,
1496 VRF_LOGNAME(vrf), nexthop->vrf_id);
1497 }
1498
1499 if (nexthop->weight)
1500 rtnh->rtnh_hops = nexthop->weight - 1;
1501
1502 nl_attr_rtnh_end(nlmsg, rtnh);
1503 return true;
1504 }
1505
1506 static inline bool _netlink_mpls_build_singlepath(const struct prefix *p,
1507 const char *routedesc,
1508 const zebra_nhlfe_t *nhlfe,
1509 struct nlmsghdr *nlmsg,
1510 struct rtmsg *rtmsg,
1511 size_t req_size, int cmd)
1512 {
1513 int bytelen;
1514 uint8_t family;
1515
1516 family = NHLFE_FAMILY(nhlfe);
1517 bytelen = (family == AF_INET ? 4 : 16);
1518 return _netlink_route_build_singlepath(p, routedesc, bytelen,
1519 nhlfe->nexthop, nlmsg, rtmsg,
1520 req_size, cmd);
1521 }
1522
1523
1524 static inline bool
1525 _netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
1526 const zebra_nhlfe_t *nhlfe,
1527 struct nlmsghdr *nlmsg, size_t req_size,
1528 struct rtmsg *rtmsg, const union g_addr **src)
1529 {
1530 int bytelen;
1531 uint8_t family;
1532
1533 family = NHLFE_FAMILY(nhlfe);
1534 bytelen = (family == AF_INET ? 4 : 16);
1535 return _netlink_route_build_multipath(p, routedesc, bytelen,
1536 nhlfe->nexthop, nlmsg, req_size,
1537 rtmsg, src);
1538 }
1539
1540 static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
1541 {
1542 if (IS_ZEBRA_DEBUG_KERNEL)
1543 zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1544 routedesc, nl_msg_type_to_str(cmd), label);
1545 }
1546
1547 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1548 int llalen, ns_id_t ns_id)
1549 {
1550 uint8_t protocol = RTPROT_ZEBRA;
1551 struct {
1552 struct nlmsghdr n;
1553 struct ndmsg ndm;
1554 char buf[256];
1555 } req;
1556
1557 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
1558
1559 memset(&req, 0, sizeof(req));
1560
1561 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1562 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1563 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1564 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1565
1566 req.ndm.ndm_family = AF_INET;
1567 req.ndm.ndm_state = NUD_PERMANENT;
1568 req.ndm.ndm_ifindex = ifindex;
1569 req.ndm.ndm_type = RTN_UNICAST;
1570
1571 nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
1572 sizeof(protocol));
1573 nl_attr_put32(&req.n, sizeof(req), NDA_DST, addr);
1574 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1575
1576 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1577 0);
1578 }
1579
1580 static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1581 union g_addr *src)
1582 {
1583 if (family == AF_INET) {
1584 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1585 src->ipv4 = nexthop->rmap_src.ipv4;
1586 return true;
1587 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1588 src->ipv4 = nexthop->src.ipv4;
1589 return true;
1590 }
1591 } else if (family == AF_INET6) {
1592 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1593 src->ipv6 = nexthop->rmap_src.ipv6;
1594 return true;
1595 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1596 src->ipv6 = nexthop->src.ipv6;
1597 return true;
1598 }
1599 }
1600
1601 return false;
1602 }
1603
1604 /*
1605 * The function returns true if the attribute could be added
1606 * to the message, otherwise false is returned.
1607 */
1608 static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
1609 struct nexthop *nh)
1610 {
1611 struct rtattr *nest;
1612
1613 switch (nh->nh_encap_type) {
1614 case NET_VXLAN:
1615 if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
1616 return false;
1617
1618 nest = nl_attr_nest(n, nlen, RTA_ENCAP);
1619 if (!nest)
1620 return false;
1621
1622 if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
1623 nh->nh_encap.vni))
1624 return false;
1625 nl_attr_nest_end(n, nest);
1626 break;
1627 }
1628
1629 return true;
1630 }
1631
1632 /*
1633 * Routing table change via netlink interface, using a dataplane context object
1634 *
1635 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
1636 * otherwise the number of bytes written to buf.
1637 */
1638 ssize_t netlink_route_multipath_msg_encode(int cmd,
1639 struct zebra_dplane_ctx *ctx,
1640 uint8_t *data, size_t datalen,
1641 bool fpm, bool force_nhg)
1642 {
1643 int bytelen;
1644 struct nexthop *nexthop = NULL;
1645 unsigned int nexthop_num;
1646 const char *routedesc;
1647 bool setsrc = false;
1648 union g_addr src;
1649 const struct prefix *p, *src_p;
1650 uint32_t table_id;
1651
1652 struct {
1653 struct nlmsghdr n;
1654 struct rtmsg r;
1655 char buf[];
1656 } *req = (void *)data;
1657
1658 p = dplane_ctx_get_dest(ctx);
1659 src_p = dplane_ctx_get_src(ctx);
1660
1661 if (datalen < sizeof(*req))
1662 return 0;
1663
1664 memset(req, 0, sizeof(*req));
1665
1666 bytelen = (p->family == AF_INET ? 4 : 16);
1667
1668 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1669 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1670
1671 if ((cmd == RTM_NEWROUTE) &&
1672 ((p->family == AF_INET) || v6_rr_semantics))
1673 req->n.nlmsg_flags |= NLM_F_REPLACE;
1674
1675 req->n.nlmsg_type = cmd;
1676
1677 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
1678
1679 req->r.rtm_family = p->family;
1680 req->r.rtm_dst_len = p->prefixlen;
1681 req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1682 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
1683
1684 if (cmd == RTM_DELROUTE)
1685 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
1686 else
1687 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
1688
1689 /*
1690 * blackhole routes are not RTN_UNICAST, they are
1691 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1692 * so setting this value as a RTN_UNICAST would
1693 * cause the route lookup of just the prefix
1694 * to fail. So no need to specify this for
1695 * the RTM_DELROUTE case
1696 */
1697 if (cmd != RTM_DELROUTE)
1698 req->r.rtm_type = RTN_UNICAST;
1699
1700 if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
1701 return 0;
1702 if (src_p) {
1703 if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
1704 bytelen))
1705 return 0;
1706 }
1707
1708 /* Metric. */
1709 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1710 * used
1711 * either by the kernel or by zebra. Its purely for calculating best
1712 * path(s)
1713 * by the routing protocol and for communicating with protocol peers.
1714 */
1715 if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
1716 NL_DEFAULT_ROUTE_METRIC))
1717 return 0;
1718
1719 #if defined(SUPPORT_REALMS)
1720 {
1721 route_tag_t tag;
1722
1723 if (cmd == RTM_DELROUTE)
1724 tag = dplane_ctx_get_old_tag(ctx);
1725 else
1726 tag = dplane_ctx_get_tag(ctx);
1727
1728 if (tag > 0 && tag <= 255) {
1729 if (!nl_attr_put32(&req->n, datalen, RTA_FLOW, tag))
1730 return 0;
1731 }
1732 }
1733 #endif
1734 /* Table corresponding to this route. */
1735 table_id = dplane_ctx_get_table(ctx);
1736 if (table_id < 256)
1737 req->r.rtm_table = table_id;
1738 else {
1739 req->r.rtm_table = RT_TABLE_UNSPEC;
1740 if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
1741 return 0;
1742 }
1743
1744 if (IS_ZEBRA_DEBUG_KERNEL)
1745 zlog_debug(
1746 "%s: %s %pFX vrf %u(%u)", __func__,
1747 nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
1748 table_id);
1749
1750 /*
1751 * If we are not updating the route and we have received
1752 * a route delete, then all we need to fill in is the
1753 * prefix information to tell the kernel to schwack
1754 * it.
1755 */
1756 if (cmd == RTM_DELROUTE)
1757 return NLMSG_ALIGN(req->n.nlmsg_len);
1758
1759 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
1760 struct rtattr *nest;
1761 uint32_t mtu = dplane_ctx_get_mtu(ctx);
1762 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
1763
1764 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
1765 mtu = nexthop_mtu;
1766
1767 nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
1768 if (nest == NULL)
1769 return 0;
1770
1771 if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
1772 return 0;
1773 nl_attr_nest_end(&req->n, nest);
1774 }
1775
1776 if ((!fpm && kernel_nexthops_supported()
1777 && (!proto_nexthops_only()
1778 || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0)))
1779 || (fpm && force_nhg)) {
1780 /* Kernel supports nexthop objects */
1781 if (IS_ZEBRA_DEBUG_KERNEL)
1782 zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
1783 dplane_ctx_get_nhe_id(ctx));
1784
1785 if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
1786 dplane_ctx_get_nhe_id(ctx)))
1787 return 0;
1788
1789 /* Have to determine src still */
1790 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1791 if (setsrc)
1792 break;
1793
1794 setsrc = nexthop_set_src(nexthop, p->family, &src);
1795 }
1796
1797 if (setsrc) {
1798 if (p->family == AF_INET) {
1799 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1800 &src.ipv4, bytelen))
1801 return 0;
1802 } else if (p->family == AF_INET6) {
1803 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1804 &src.ipv6, bytelen))
1805 return 0;
1806 }
1807 }
1808
1809 return NLMSG_ALIGN(req->n.nlmsg_len);
1810 }
1811
1812 /* Count overall nexthops so we can decide whether to use singlepath
1813 * or multipath case.
1814 */
1815 nexthop_num = 0;
1816 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1817 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1818 continue;
1819 if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
1820 continue;
1821
1822 nexthop_num++;
1823 }
1824
1825 /* Singlepath case. */
1826 if (nexthop_num == 1) {
1827 nexthop_num = 0;
1828 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1829 /*
1830 * So we want to cover 2 types of blackhole
1831 * routes here:
1832 * 1) A normal blackhole route( ala from a static
1833 * install.
1834 * 2) A recursively resolved blackhole route
1835 */
1836 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1837 switch (nexthop->bh_type) {
1838 case BLACKHOLE_ADMINPROHIB:
1839 req->r.rtm_type = RTN_PROHIBIT;
1840 break;
1841 case BLACKHOLE_REJECT:
1842 req->r.rtm_type = RTN_UNREACHABLE;
1843 break;
1844 default:
1845 req->r.rtm_type = RTN_BLACKHOLE;
1846 break;
1847 }
1848 return NLMSG_ALIGN(req->n.nlmsg_len);
1849 }
1850 if (CHECK_FLAG(nexthop->flags,
1851 NEXTHOP_FLAG_RECURSIVE)) {
1852
1853 if (setsrc)
1854 continue;
1855
1856 setsrc = nexthop_set_src(nexthop, p->family,
1857 &src);
1858 continue;
1859 }
1860
1861 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
1862 routedesc = nexthop->rparent
1863 ? "recursive, single-path"
1864 : "single-path";
1865
1866 if (!_netlink_route_build_singlepath(
1867 p, routedesc, bytelen, nexthop,
1868 &req->n, &req->r, datalen, cmd))
1869 return 0;
1870 nexthop_num++;
1871 break;
1872 }
1873
1874 /*
1875 * Add encapsulation information when installing via
1876 * FPM.
1877 */
1878 if (fpm) {
1879 if (!netlink_route_nexthop_encap(
1880 &req->n, datalen, nexthop))
1881 return 0;
1882 }
1883 }
1884
1885 if (setsrc) {
1886 if (p->family == AF_INET) {
1887 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1888 &src.ipv4, bytelen))
1889 return 0;
1890 } else if (p->family == AF_INET6) {
1891 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1892 &src.ipv6, bytelen))
1893 return 0;
1894 }
1895 }
1896 } else { /* Multipath case */
1897 struct rtattr *nest;
1898 const union g_addr *src1 = NULL;
1899
1900 nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
1901 if (nest == NULL)
1902 return 0;
1903
1904 nexthop_num = 0;
1905 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1906 if (CHECK_FLAG(nexthop->flags,
1907 NEXTHOP_FLAG_RECURSIVE)) {
1908 /* This only works for IPv4 now */
1909 if (setsrc)
1910 continue;
1911
1912 setsrc = nexthop_set_src(nexthop, p->family,
1913 &src);
1914 continue;
1915 }
1916
1917 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
1918 routedesc = nexthop->rparent
1919 ? "recursive, multipath"
1920 : "multipath";
1921 nexthop_num++;
1922
1923 if (!_netlink_route_build_multipath(
1924 p, routedesc, bytelen, nexthop,
1925 &req->n, datalen, &req->r, &src1))
1926 return 0;
1927
1928 if (!setsrc && src1) {
1929 if (p->family == AF_INET)
1930 src.ipv4 = src1->ipv4;
1931 else if (p->family == AF_INET6)
1932 src.ipv6 = src1->ipv6;
1933
1934 setsrc = 1;
1935 }
1936 }
1937 }
1938
1939 nl_attr_nest_end(&req->n, nest);
1940
1941 /*
1942 * Add encapsulation information when installing via
1943 * FPM.
1944 */
1945 if (fpm) {
1946 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
1947 nexthop)) {
1948 if (CHECK_FLAG(nexthop->flags,
1949 NEXTHOP_FLAG_RECURSIVE))
1950 continue;
1951 if (!netlink_route_nexthop_encap(
1952 &req->n, datalen, nexthop))
1953 return 0;
1954 }
1955 }
1956
1957
1958 if (setsrc) {
1959 if (p->family == AF_INET) {
1960 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1961 &src.ipv4, bytelen))
1962 return 0;
1963 } else if (p->family == AF_INET6) {
1964 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1965 &src.ipv6, bytelen))
1966 return 0;
1967 }
1968 if (IS_ZEBRA_DEBUG_KERNEL)
1969 zlog_debug("Setting source");
1970 }
1971 }
1972
1973 /* If there is no useful nexthop then return. */
1974 if (nexthop_num == 0) {
1975 if (IS_ZEBRA_DEBUG_KERNEL)
1976 zlog_debug("%s: No useful nexthop.", __func__);
1977 }
1978
1979 return NLMSG_ALIGN(req->n.nlmsg_len);
1980 }
1981
1982 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1983 {
1984 uint32_t actual_table;
1985 int suc = 0;
1986 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1987 struct {
1988 struct nlmsghdr n;
1989 struct ndmsg ndm;
1990 char buf[256];
1991 } req;
1992
1993 mroute = mr;
1994 struct zebra_ns *zns;
1995
1996 zns = zvrf->zns;
1997 memset(&req, 0, sizeof(req));
1998
1999 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2000 req.n.nlmsg_flags = NLM_F_REQUEST;
2001 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2002
2003 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
2004 req.n.nlmsg_type = RTM_GETROUTE;
2005
2006 nl_attr_put32(&req.n, sizeof(req), RTA_IIF, mroute->ifindex);
2007 nl_attr_put32(&req.n, sizeof(req), RTA_OIF, mroute->ifindex);
2008 nl_attr_put32(&req.n, sizeof(req), RTA_SRC, mroute->sg.src.s_addr);
2009 nl_attr_put32(&req.n, sizeof(req), RTA_DST, mroute->sg.grp.s_addr);
2010 /*
2011 * What?
2012 *
2013 * So during the namespace cleanup we started storing
2014 * the zvrf table_id for the default table as RT_TABLE_MAIN
2015 * which is what the normal routing table for ip routing is.
2016 * This change caused this to break our lookups of sg data
2017 * because prior to this change the zvrf->table_id was 0
2018 * and when the pim multicast kernel code saw a 0,
2019 * it was auto-translated to RT_TABLE_DEFAULT. But since
2020 * we are now passing in RT_TABLE_MAIN there is no auto-translation
2021 * and the kernel goes screw you and the delicious cookies you
2022 * are trying to give me. So now we have this little hack.
2023 */
2024 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
2025 zvrf->table_id;
2026 nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
2027
2028 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
2029 &zns->netlink_cmd, zns, 0);
2030
2031 mroute = NULL;
2032 return suc;
2033 }
2034
2035 /* Char length to debug ID with */
2036 #define ID_LENGTH 10
2037
2038 static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
2039 uint32_t id,
2040 const struct nh_grp *z_grp,
2041 const uint8_t count)
2042 {
2043 struct nexthop_grp grp[count];
2044 /* Need space for max group size, "/", and null term */
2045 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
2046 char buf1[ID_LENGTH + 2];
2047
2048 buf[0] = '\0';
2049
2050 memset(grp, 0, sizeof(grp));
2051
2052 if (count) {
2053 for (int i = 0; i < count; i++) {
2054 grp[i].id = z_grp[i].id;
2055 grp[i].weight = z_grp[i].weight - 1;
2056
2057 if (IS_ZEBRA_DEBUG_KERNEL) {
2058 if (i == 0)
2059 snprintf(buf, sizeof(buf1), "group %u",
2060 grp[i].id);
2061 else {
2062 snprintf(buf1, sizeof(buf1), "/%u",
2063 grp[i].id);
2064 strlcat(buf, buf1, sizeof(buf));
2065 }
2066 }
2067 }
2068 if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2069 count * sizeof(*grp)))
2070 return false;
2071 }
2072
2073 if (IS_ZEBRA_DEBUG_KERNEL)
2074 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
2075
2076 return true;
2077 }
2078
2079 /**
2080 * Next hop packet encoding helper function.
2081 *
2082 * \param[in] cmd netlink command.
2083 * \param[in] ctx dataplane context (information snapshot).
2084 * \param[out] buf buffer to hold the packet.
2085 * \param[in] buflen amount of buffer bytes.
2086 *
2087 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2088 * otherwise the number of bytes written to buf.
2089 */
2090 ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2091 const struct zebra_dplane_ctx *ctx,
2092 void *buf, size_t buflen)
2093 {
2094 struct {
2095 struct nlmsghdr n;
2096 struct nhmsg nhm;
2097 char buf[];
2098 } *req = buf;
2099
2100 mpls_lse_t out_lse[MPLS_MAX_LABELS];
2101 char label_buf[256];
2102 int num_labels = 0;
2103 uint32_t id = dplane_ctx_get_nhe_id(ctx);
2104 int type = dplane_ctx_get_nhe_type(ctx);
2105
2106 if (!id) {
2107 flog_err(
2108 EC_ZEBRA_NHG_FIB_UPDATE,
2109 "Failed trying to update a nexthop group in the kernel that does not have an ID");
2110 return -1;
2111 }
2112
2113 /*
2114 * Nothing to do if the kernel doesn't support nexthop objects or
2115 * we dont want to install this type of NHG
2116 */
2117 if (!kernel_nexthops_supported()) {
2118 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2119 zlog_debug(
2120 "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring",
2121 __func__, id, zebra_route_string(type));
2122 return 0;
2123 }
2124
2125 if (proto_nexthops_only() && !is_proto_nhg(id, type)) {
2126 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2127 zlog_debug(
2128 "%s: nhg_id %u (%s): proto-based nexthops only, ignoring",
2129 __func__, id, zebra_route_string(type));
2130 return 0;
2131 }
2132
2133 label_buf[0] = '\0';
2134
2135 if (buflen < sizeof(*req))
2136 return 0;
2137
2138 memset(req, 0, sizeof(*req));
2139
2140 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2141 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2142
2143 if (cmd == RTM_NEWNEXTHOP)
2144 req->n.nlmsg_flags |= NLM_F_REPLACE;
2145
2146 req->n.nlmsg_type = cmd;
2147 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
2148
2149 req->nhm.nh_family = AF_UNSPEC;
2150 /* TODO: Scope? */
2151
2152 if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2153 return 0;
2154
2155 if (cmd == RTM_NEWNEXTHOP) {
2156 /*
2157 * We distinguish between a "group", which is a collection
2158 * of ids, and a singleton nexthop with an id. The
2159 * group is installed as an id that just refers to a list of
2160 * other ids.
2161 */
2162 if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
2163 if (!_netlink_nexthop_build_group(
2164 &req->n, buflen, id,
2165 dplane_ctx_get_nhe_nh_grp(ctx),
2166 dplane_ctx_get_nhe_nh_grp_count(ctx)))
2167 return 0;
2168 } else {
2169 const struct nexthop *nh =
2170 dplane_ctx_get_nhe_ng(ctx)->nexthop;
2171 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
2172
2173 if (afi == AFI_IP)
2174 req->nhm.nh_family = AF_INET;
2175 else if (afi == AFI_IP6)
2176 req->nhm.nh_family = AF_INET6;
2177
2178 switch (nh->type) {
2179 case NEXTHOP_TYPE_IPV4:
2180 case NEXTHOP_TYPE_IPV4_IFINDEX:
2181 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2182 &nh->gate.ipv4,
2183 IPV4_MAX_BYTELEN))
2184 return 0;
2185 break;
2186 case NEXTHOP_TYPE_IPV6:
2187 case NEXTHOP_TYPE_IPV6_IFINDEX:
2188 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2189 &nh->gate.ipv6,
2190 IPV6_MAX_BYTELEN))
2191 return 0;
2192 break;
2193 case NEXTHOP_TYPE_BLACKHOLE:
2194 if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2195 NULL, 0))
2196 return 0;
2197 /* Blackhole shouldn't have anymore attributes
2198 */
2199 goto nexthop_done;
2200 case NEXTHOP_TYPE_IFINDEX:
2201 /* Don't need anymore info for this */
2202 break;
2203 }
2204
2205 if (!nh->ifindex) {
2206 flog_err(
2207 EC_ZEBRA_NHG_FIB_UPDATE,
2208 "Context received for kernel nexthop update without an interface");
2209 return -1;
2210 }
2211
2212 if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2213 nh->ifindex))
2214 return 0;
2215
2216 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
2217 req->nhm.nh_flags |= RTNH_F_ONLINK;
2218
2219 num_labels =
2220 build_label_stack(nh->nh_label, out_lse,
2221 label_buf, sizeof(label_buf));
2222
2223 if (num_labels) {
2224 /* Set the BoS bit */
2225 out_lse[num_labels - 1] |=
2226 htonl(1 << MPLS_LS_S_SHIFT);
2227
2228 /*
2229 * TODO: MPLS unsupported for now in kernel.
2230 */
2231 if (req->nhm.nh_family == AF_MPLS)
2232 goto nexthop_done;
2233 #if 0
2234 if (!nl_attr_put(&req->n, buflen, NHA_NEWDST,
2235 &out_lse,
2236 num_labels
2237 * sizeof(mpls_lse_t)))
2238 return 0;
2239 #endif
2240 else {
2241 struct rtattr *nest;
2242 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
2243
2244 if (!nl_attr_put16(&req->n, buflen,
2245 NHA_ENCAP_TYPE,
2246 encap))
2247 return 0;
2248 nest = nl_attr_nest(&req->n, buflen,
2249 NHA_ENCAP);
2250 if (!nest)
2251 return 0;
2252 if (!nl_attr_put(
2253 &req->n, buflen,
2254 MPLS_IPTUNNEL_DST, &out_lse,
2255 num_labels
2256 * sizeof(
2257 mpls_lse_t)))
2258 return 0;
2259 nl_attr_nest_end(&req->n, nest);
2260 }
2261 }
2262
2263 nexthop_done:
2264
2265 if (IS_ZEBRA_DEBUG_KERNEL)
2266 zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2267 __func__, id, nh, nh->ifindex,
2268 vrf_id_to_name(nh->vrf_id),
2269 nh->vrf_id, label_buf);
2270 }
2271
2272 req->nhm.nh_protocol = zebra2proto(type);
2273
2274 } else if (cmd != RTM_DELNEXTHOP) {
2275 flog_err(
2276 EC_ZEBRA_NHG_FIB_UPDATE,
2277 "Nexthop group kernel update command (%d) does not exist",
2278 cmd);
2279 return -1;
2280 }
2281
2282 if (IS_ZEBRA_DEBUG_KERNEL)
2283 zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2284 id);
2285
2286 return NLMSG_ALIGN(req->n.nlmsg_len);
2287 }
2288
2289 static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx,
2290 void *buf, size_t buflen)
2291 {
2292 enum dplane_op_e op;
2293 int cmd = 0;
2294
2295 op = dplane_ctx_get_op(ctx);
2296 if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
2297 cmd = RTM_NEWNEXTHOP;
2298 else if (op == DPLANE_OP_NH_DELETE)
2299 cmd = RTM_DELNEXTHOP;
2300 else {
2301 flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2302 "Context received for kernel nexthop update with incorrect OP code (%u)",
2303 op);
2304 return -1;
2305 }
2306
2307 return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen);
2308 }
2309
2310 enum netlink_msg_status
2311 netlink_put_nexthop_update_msg(struct nl_batch *bth,
2312 struct zebra_dplane_ctx *ctx)
2313 {
2314 /* Nothing to do if the kernel doesn't support nexthop objects */
2315 if (!kernel_nexthops_supported())
2316 return FRR_NETLINK_SUCCESS;
2317
2318 return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder,
2319 false);
2320 }
2321
2322 static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2323 void *buf, size_t buflen)
2324 {
2325 return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf,
2326 buflen, false, false);
2327 }
2328
2329 static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2330 void *buf, size_t buflen)
2331 {
2332 return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf,
2333 buflen, false, false);
2334 }
2335
2336 enum netlink_msg_status
2337 netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
2338 {
2339 int cmd;
2340 const struct prefix *p = dplane_ctx_get_dest(ctx);
2341
2342 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2343 cmd = RTM_DELROUTE;
2344 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2345 cmd = RTM_NEWROUTE;
2346 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2347
2348 if (p->family == AF_INET || v6_rr_semantics) {
2349 /* Single 'replace' operation */
2350
2351 /*
2352 * With route replace semantics in place
2353 * for v4 routes and the new route is a system
2354 * route we do not install anything.
2355 * The problem here is that the new system
2356 * route should cause us to withdraw from
2357 * the kernel the old non-system route
2358 */
2359 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))
2360 && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2361 netlink_batch_add_msg(
2362 bth, ctx, netlink_delroute_msg_encoder,
2363 true);
2364 } else {
2365 /*
2366 * So v6 route replace semantics are not in
2367 * the kernel at this point as I understand it.
2368 * so let's do a delete then an add.
2369 * In the future once v6 route replace semantics
2370 * are in we can figure out what to do here to
2371 * allow working with old and new kernels.
2372 *
2373 * I'm also intentionally ignoring the failure case
2374 * of the route delete. If that happens yeah we're
2375 * screwed.
2376 */
2377 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2378 netlink_batch_add_msg(
2379 bth, ctx, netlink_delroute_msg_encoder,
2380 true);
2381 }
2382
2383 cmd = RTM_NEWROUTE;
2384 } else
2385 return FRR_NETLINK_ERROR;
2386
2387 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
2388 return FRR_NETLINK_SUCCESS;
2389
2390 return netlink_batch_add_msg(bth, ctx,
2391 cmd == RTM_NEWROUTE
2392 ? netlink_newroute_msg_encoder
2393 : netlink_delroute_msg_encoder,
2394 false);
2395 }
2396
2397 /**
2398 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2399 *
2400 * @tb: Netlink RTA data
2401 * @family: Address family in the nhmsg
2402 * @ifp: Interface connected - this should be NULL, we fill it in
2403 * @ns_id: Namspace id
2404 *
2405 * Return: New nexthop
2406 */
2407 static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2408 unsigned char family,
2409 struct interface **ifp,
2410 ns_id_t ns_id)
2411 {
2412 struct nexthop nh = {};
2413 void *gate = NULL;
2414 enum nexthop_types_t type = 0;
2415 int if_index = 0;
2416 size_t sz = 0;
2417 struct interface *ifp_lookup;
2418
2419 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2420
2421
2422 if (tb[NHA_GATEWAY]) {
2423 switch (family) {
2424 case AF_INET:
2425 type = NEXTHOP_TYPE_IPV4_IFINDEX;
2426 sz = 4;
2427 break;
2428 case AF_INET6:
2429 type = NEXTHOP_TYPE_IPV6_IFINDEX;
2430 sz = 16;
2431 break;
2432 default:
2433 flog_warn(
2434 EC_ZEBRA_BAD_NHG_MESSAGE,
2435 "Nexthop gateway with bad address family (%d) received from kernel",
2436 family);
2437 return nh;
2438 }
2439 gate = RTA_DATA(tb[NHA_GATEWAY]);
2440 } else
2441 type = NEXTHOP_TYPE_IFINDEX;
2442
2443 if (type)
2444 nh.type = type;
2445
2446 if (gate)
2447 memcpy(&(nh.gate), gate, sz);
2448
2449 if (if_index)
2450 nh.ifindex = if_index;
2451
2452 ifp_lookup =
2453 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
2454
2455 if (ifp)
2456 *ifp = ifp_lookup;
2457 if (ifp_lookup)
2458 nh.vrf_id = ifp_lookup->vrf_id;
2459 else {
2460 flog_warn(
2461 EC_ZEBRA_UNKNOWN_INTERFACE,
2462 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
2463 __func__, nh.ifindex);
2464
2465 nh.vrf_id = VRF_DEFAULT;
2466 }
2467
2468 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
2469 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
2470 int num_labels = 0;
2471
2472 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
2473
2474 if (encap_type == LWTUNNEL_ENCAP_MPLS)
2475 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
2476
2477 if (num_labels)
2478 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
2479 labels);
2480 }
2481
2482 return nh;
2483 }
2484
2485 static int netlink_nexthop_process_group(struct rtattr **tb,
2486 struct nh_grp *z_grp, int z_grp_size)
2487 {
2488 uint8_t count = 0;
2489 /* linux/nexthop.h group struct */
2490 struct nexthop_grp *n_grp = NULL;
2491
2492 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
2493 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
2494
2495 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
2496 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
2497 "Invalid nexthop group received from the kernel");
2498 return count;
2499 }
2500
2501 #if 0
2502 // TODO: Need type for something?
2503 zlog_debug("Nexthop group type: %d",
2504 *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE])));
2505
2506 #endif
2507
2508 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
2509 z_grp[i].id = n_grp[i].id;
2510 z_grp[i].weight = n_grp[i].weight + 1;
2511 }
2512 return count;
2513 }
2514
2515 /**
2516 * netlink_nexthop_change() - Read in change about nexthops from the kernel
2517 *
2518 * @h: Netlink message header
2519 * @ns_id: Namspace id
2520 * @startup: Are we reading under startup conditions?
2521 *
2522 * Return: Result status
2523 */
2524 int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2525 {
2526 int len;
2527 /* nexthop group id */
2528 uint32_t id;
2529 unsigned char family;
2530 int type;
2531 afi_t afi = AFI_UNSPEC;
2532 vrf_id_t vrf_id = VRF_DEFAULT;
2533 struct interface *ifp = NULL;
2534 struct nhmsg *nhm = NULL;
2535 struct nexthop nh = {};
2536 struct nh_grp grp[MULTIPATH_NUM] = {};
2537 /* Count of nexthops in group array */
2538 uint8_t grp_count = 0;
2539 struct rtattr *tb[NHA_MAX + 1] = {};
2540
2541 nhm = NLMSG_DATA(h);
2542
2543 if (ns_id)
2544 vrf_id = ns_id;
2545
2546 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
2547 return 0;
2548
2549 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
2550 if (len < 0) {
2551 zlog_warn(
2552 "%s: Message received from netlink is of a broken size %d %zu",
2553 __func__, h->nlmsg_len,
2554 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
2555 return -1;
2556 }
2557
2558 netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
2559
2560
2561 if (!tb[NHA_ID]) {
2562 flog_warn(
2563 EC_ZEBRA_BAD_NHG_MESSAGE,
2564 "Nexthop group without an ID received from the kernel");
2565 return -1;
2566 }
2567
2568 /* We use the ID key'd nhg table for kernel updates */
2569 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
2570
2571 if (zebra_evpn_mh_is_fdb_nh(id)) {
2572 /* If this is a L2 NH just ignore it */
2573 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
2574 zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
2575 h->nlmsg_type, id);
2576 }
2577 return 0;
2578 }
2579
2580 family = nhm->nh_family;
2581 afi = family2afi(family);
2582
2583 type = proto2zebra(nhm->nh_protocol, 0, true);
2584
2585 if (IS_ZEBRA_DEBUG_KERNEL)
2586 zlog_debug("%s ID (%u) %s NS %u",
2587 nl_msg_type_to_str(h->nlmsg_type), id,
2588 nl_family_to_str(family), ns_id);
2589
2590
2591 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
2592 if (tb[NHA_GROUP]) {
2593 /**
2594 * If this is a group message its only going to have
2595 * an array of nexthop IDs associated with it
2596 */
2597 grp_count = netlink_nexthop_process_group(
2598 tb, grp, array_size(grp));
2599 } else {
2600 if (tb[NHA_BLACKHOLE]) {
2601 /**
2602 * This nexthop is just for blackhole-ing
2603 * traffic, it should not have an OIF, GATEWAY,
2604 * or ENCAP
2605 */
2606 nh.type = NEXTHOP_TYPE_BLACKHOLE;
2607 nh.bh_type = BLACKHOLE_UNSPEC;
2608 } else if (tb[NHA_OIF])
2609 /**
2610 * This is a true new nexthop, so we need
2611 * to parse the gateway and device info
2612 */
2613 nh = netlink_nexthop_process_nh(tb, family,
2614 &ifp, ns_id);
2615 else {
2616
2617 flog_warn(
2618 EC_ZEBRA_BAD_NHG_MESSAGE,
2619 "Invalid Nexthop message received from the kernel with ID (%u)",
2620 id);
2621 return -1;
2622 }
2623 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
2624 if (nhm->nh_flags & RTNH_F_ONLINK)
2625 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
2626 vrf_id = nh.vrf_id;
2627 }
2628
2629 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
2630 type, startup))
2631 return -1;
2632
2633 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
2634 zebra_nhg_kernel_del(id, vrf_id);
2635
2636 return 0;
2637 }
2638
2639 /**
2640 * netlink_request_nexthop() - Request nextop information from the kernel
2641 * @zns: Zebra namespace
2642 * @family: AF_* netlink family
2643 * @type: RTM_* route type
2644 *
2645 * Return: Result status
2646 */
2647 static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
2648 {
2649 struct {
2650 struct nlmsghdr n;
2651 struct nhmsg nhm;
2652 } req;
2653
2654 /* Form the request, specifying filter (rtattr) if needed. */
2655 memset(&req, 0, sizeof(req));
2656 req.n.nlmsg_type = type;
2657 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
2658 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2659 req.nhm.nh_family = family;
2660
2661 return netlink_request(&zns->netlink_cmd, &req);
2662 }
2663
2664
2665 /**
2666 * netlink_nexthop_read() - Nexthop read function using netlink interface
2667 *
2668 * @zns: Zebra name space
2669 *
2670 * Return: Result status
2671 * Only called at bootstrap time.
2672 */
2673 int netlink_nexthop_read(struct zebra_ns *zns)
2674 {
2675 int ret;
2676 struct zebra_dplane_info dp_info;
2677
2678 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2679
2680 /* Get nexthop objects */
2681 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
2682 if (ret < 0)
2683 return ret;
2684 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
2685 &dp_info, 0, 1);
2686
2687 if (!ret)
2688 /* If we succesfully read in nexthop objects,
2689 * this kernel must support them.
2690 */
2691 supports_nh = true;
2692
2693 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2694 zlog_debug("Nexthop objects %ssupported on this kernel",
2695 supports_nh ? "" : "not ");
2696
2697 return ret;
2698 }
2699
2700
2701 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
2702 int llalen, ns_id_t ns_id)
2703 {
2704 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
2705 addr, lla, llalen, ns_id);
2706 }
2707
2708 /**
2709 * netlink_neigh_update_msg_encode() - Common helper api for encoding
2710 * evpn neighbor update as netlink messages using dataplane context object.
2711 * Here, a neighbor refers to a bridge forwarding database entry for
2712 * either unicast forwarding or head-end replication or an IP neighbor
2713 * entry.
2714 * @ctx: Dataplane context
2715 * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
2716 * @mac: A neighbor cache link layer address
2717 * @ip: A neighbor cache n/w layer destination address
2718 * In the case of bridge FDB, this represnts the remote
2719 * VTEP IP.
2720 * @replace_obj: Whether NEW request should replace existing object or
2721 * add to the end of the list
2722 * @family: AF_* netlink family
2723 * @type: RTN_* route type
2724 * @flags: NTF_* flags
2725 * @state: NUD_* states
2726 * @data: data buffer pointer
2727 * @datalen: total amount of data buffer space
2728 *
2729 * Return: 0 when the msg doesn't fit entirely in the buffer
2730 * otherwise the number of bytes written to buf.
2731 */
2732 static ssize_t netlink_neigh_update_msg_encode(
2733 const struct zebra_dplane_ctx *ctx, int cmd, const struct ethaddr *mac,
2734 const struct ipaddr *ip, bool replace_obj, uint8_t family, uint8_t type,
2735 uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy,
2736 uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data,
2737 size_t datalen)
2738 {
2739 uint8_t protocol = RTPROT_ZEBRA;
2740 struct {
2741 struct nlmsghdr n;
2742 struct ndmsg ndm;
2743 char buf[];
2744 } *req = data;
2745 int ipa_len;
2746 enum dplane_op_e op;
2747
2748 if (datalen < sizeof(*req))
2749 return 0;
2750 memset(req, 0, sizeof(*req));
2751
2752 op = dplane_ctx_get_op(ctx);
2753
2754 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2755 req->n.nlmsg_flags = NLM_F_REQUEST;
2756 if (cmd == RTM_NEWNEIGH)
2757 req->n.nlmsg_flags |=
2758 NLM_F_CREATE
2759 | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
2760 req->n.nlmsg_type = cmd;
2761 req->ndm.ndm_family = family;
2762 req->ndm.ndm_type = type;
2763 req->ndm.ndm_state = state;
2764 req->ndm.ndm_flags = flags;
2765 req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
2766
2767 if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
2768 sizeof(protocol)))
2769 return 0;
2770
2771 if (mac) {
2772 if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, mac, 6))
2773 return 0;
2774 }
2775
2776 if (nhg_id) {
2777 if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
2778 return 0;
2779 }
2780 if (nfy) {
2781 if (!nl_attr_put(&req->n, datalen, NDA_NOTIFY,
2782 &nfy_flags, sizeof(nfy_flags)))
2783 return 0;
2784 }
2785
2786 if (ext) {
2787 if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags,
2788 sizeof(ext_flags)))
2789 return 0;
2790 }
2791
2792 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2793 if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, ipa_len))
2794 return 0;
2795
2796 if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
2797 vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
2798
2799 if (vid > 0) {
2800 if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
2801 return 0;
2802 }
2803
2804 if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
2805 dplane_ctx_mac_get_br_ifindex(ctx)))
2806 return 0;
2807 }
2808
2809 return NLMSG_ALIGN(req->n.nlmsg_len);
2810 }
2811
2812 /*
2813 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
2814 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
2815 */
2816 static ssize_t
2817 netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd,
2818 void *buf, size_t buflen)
2819 {
2820 struct ethaddr dst_mac = {.octet = {0}};
2821
2822 return netlink_neigh_update_msg_encode(
2823 ctx, cmd, &dst_mac, dplane_ctx_neigh_get_ipaddr(ctx), false,
2824 PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/,
2825 false /*nfy*/, 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/,
2826 buf, buflen);
2827 }
2828
2829 #ifndef NDA_RTA
2830 #define NDA_RTA(r) \
2831 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2832 #endif
2833
2834 static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2835 {
2836 struct ndmsg *ndm;
2837 struct interface *ifp;
2838 struct zebra_if *zif;
2839 struct rtattr *tb[NDA_MAX + 1];
2840 struct interface *br_if;
2841 struct ethaddr mac;
2842 vlanid_t vid = 0;
2843 struct in_addr vtep_ip;
2844 int vid_present = 0, dst_present = 0;
2845 char buf[ETHER_ADDR_STRLEN];
2846 char vid_buf[20];
2847 char dst_buf[30];
2848 bool sticky;
2849 bool local_inactive = false;
2850 bool dp_static = false;
2851 uint32_t nhg_id = 0;
2852
2853 ndm = NLMSG_DATA(h);
2854
2855 /* We only process macfdb notifications if EVPN is enabled */
2856 if (!is_evpn_enabled())
2857 return 0;
2858
2859 /* Parse attributes and extract fields of interest. Do basic
2860 * validation of the fields.
2861 */
2862 memset(tb, 0, sizeof tb);
2863 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2864
2865 if (!tb[NDA_LLADDR]) {
2866 if (IS_ZEBRA_DEBUG_KERNEL)
2867 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
2868 nl_msg_type_to_str(h->nlmsg_type),
2869 ndm->ndm_ifindex);
2870 return 0;
2871 }
2872
2873 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2874 if (IS_ZEBRA_DEBUG_KERNEL)
2875 zlog_debug(
2876 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
2877 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
2878 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2879 return 0;
2880 }
2881
2882 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2883
2884 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
2885 vid_present = 1;
2886 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
2887 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
2888 }
2889
2890 if (tb[NDA_DST]) {
2891 /* TODO: Only IPv4 supported now. */
2892 dst_present = 1;
2893 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
2894 IPV4_MAX_BYTELEN);
2895 snprintf(dst_buf, sizeof(dst_buf), " dst %s",
2896 inet_ntoa(vtep_ip));
2897 }
2898
2899 if (tb[NDA_NH_ID])
2900 nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
2901
2902 if (ndm->ndm_state & NUD_STALE)
2903 local_inactive = true;
2904
2905 if (tb[NDA_NOTIFY]) {
2906 uint8_t nfy_flags;
2907
2908 dp_static = true;
2909 nfy_flags = *(uint8_t *)RTA_DATA(tb[NDA_NOTIFY]);
2910 /* local activity has not been detected on the entry */
2911 if (nfy_flags & (1 << BR_FDB_NFY_INACTIVE))
2912 local_inactive = true;
2913 }
2914
2915 if (IS_ZEBRA_DEBUG_KERNEL)
2916 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s nhg %d",
2917 nl_msg_type_to_str(h->nlmsg_type),
2918 ndm->ndm_ifindex, vid_present ? vid_buf : "",
2919 ndm->ndm_state, ndm->ndm_flags,
2920 prefix_mac2str(&mac, buf, sizeof(buf)),
2921 dst_present ? dst_buf : "", nhg_id);
2922
2923 /* The interface should exist. */
2924 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2925 ndm->ndm_ifindex);
2926 if (!ifp || !ifp->info)
2927 return 0;
2928
2929 /* The interface should be something we're interested in. */
2930 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
2931 return 0;
2932
2933 zif = (struct zebra_if *)ifp->info;
2934 if ((br_if = zif->brslave_info.br_if) == NULL) {
2935 if (IS_ZEBRA_DEBUG_KERNEL)
2936 zlog_debug(
2937 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
2938 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
2939 ndm->ndm_ifindex,
2940 zif->brslave_info.bridge_ifindex);
2941 return 0;
2942 }
2943
2944 sticky = !!(ndm->ndm_flags & NTF_STICKY);
2945
2946 if (filter_vlan && vid != filter_vlan) {
2947 if (IS_ZEBRA_DEBUG_KERNEL)
2948 zlog_debug(" Filtered due to filter vlan: %d",
2949 filter_vlan);
2950 return 0;
2951 }
2952
2953 /* If add or update, do accordingly if learnt on a "local" interface; if
2954 * the notification is over VxLAN, this has to be related to
2955 * multi-homing,
2956 * so perform an implicit delete of any local entry (if it exists).
2957 */
2958 if (h->nlmsg_type == RTM_NEWNEIGH) {
2959 /* Drop "permanent" entries. */
2960 if (ndm->ndm_state & NUD_PERMANENT) {
2961 if (IS_ZEBRA_DEBUG_KERNEL)
2962 zlog_debug(
2963 " Dropping entry because of NUD_PERMANENT");
2964 return 0;
2965 }
2966
2967 if (IS_ZEBRA_IF_VXLAN(ifp))
2968 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
2969 vid);
2970
2971 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
2972 sticky, local_inactive, dp_static);
2973 }
2974
2975 /* This is a delete notification.
2976 * Ignore the notification with IP dest as it may just signify that the
2977 * MAC has moved from remote to local. The exception is the special
2978 * all-zeros MAC that represents the BUM flooding entry; we may have
2979 * to readd it. Otherwise,
2980 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
2981 * 2. For a MAC over "local" interface, delete the mac
2982 * Note: We will get notifications from both bridge driver and VxLAN
2983 * driver.
2984 */
2985 if (nhg_id)
2986 return 0;
2987
2988 if (dst_present) {
2989 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2990
2991 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
2992 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
2993 return 0;
2994 }
2995
2996 if (IS_ZEBRA_IF_VXLAN(ifp))
2997 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
2998 vid);
2999
3000 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
3001 }
3002
3003 static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3004 {
3005 int len;
3006 struct ndmsg *ndm;
3007
3008 if (h->nlmsg_type != RTM_NEWNEIGH)
3009 return 0;
3010
3011 /* Length validity. */
3012 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3013 if (len < 0)
3014 return -1;
3015
3016 /* We are interested only in AF_BRIDGE notifications. */
3017 ndm = NLMSG_DATA(h);
3018 if (ndm->ndm_family != AF_BRIDGE)
3019 return 0;
3020
3021 return netlink_macfdb_change(h, len, ns_id);
3022 }
3023
3024 /* Request for MAC FDB information from the kernel */
3025 static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
3026 int type, ifindex_t master_ifindex)
3027 {
3028 struct {
3029 struct nlmsghdr n;
3030 struct ifinfomsg ifm;
3031 char buf[256];
3032 } req;
3033
3034 /* Form the request, specifying filter (rtattr) if needed. */
3035 memset(&req, 0, sizeof(req));
3036 req.n.nlmsg_type = type;
3037 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3038 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
3039 req.ifm.ifi_family = family;
3040 if (master_ifindex)
3041 nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
3042
3043 return netlink_request(netlink_cmd, &req);
3044 }
3045
3046 /*
3047 * MAC forwarding database read using netlink interface. This is invoked
3048 * at startup.
3049 */
3050 int netlink_macfdb_read(struct zebra_ns *zns)
3051 {
3052 int ret;
3053 struct zebra_dplane_info dp_info;
3054
3055 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3056
3057 /* Get bridge FDB table. */
3058 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3059 0);
3060 if (ret < 0)
3061 return ret;
3062 /* We are reading entire table. */
3063 filter_vlan = 0;
3064 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3065 &dp_info, 0, 1);
3066
3067 return ret;
3068 }
3069
3070 /*
3071 * MAC forwarding database read using netlink interface. This is for a
3072 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3073 */
3074 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3075 struct interface *br_if)
3076 {
3077 struct zebra_if *br_zif;
3078 struct zebra_if *zif;
3079 struct zebra_l2info_vxlan *vxl;
3080 struct zebra_dplane_info dp_info;
3081 int ret = 0;
3082
3083 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3084
3085 /* Save VLAN we're filtering on, if needed. */
3086 br_zif = (struct zebra_if *)br_if->info;
3087 zif = (struct zebra_if *)ifp->info;
3088 vxl = &zif->l2info.vxl;
3089 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3090 filter_vlan = vxl->access_vlan;
3091
3092 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3093 */
3094 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3095 br_if->ifindex);
3096 if (ret < 0)
3097 return ret;
3098 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3099 &dp_info, 0, 0);
3100
3101 /* Reset VLAN filter. */
3102 filter_vlan = 0;
3103 return ret;
3104 }
3105
3106
3107 /* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3108 static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
3109 int family,
3110 int type,
3111 struct interface *br_if,
3112 struct ethaddr *mac,
3113 vlanid_t vid)
3114 {
3115 struct {
3116 struct nlmsghdr n;
3117 struct ndmsg ndm;
3118 char buf[256];
3119 } req;
3120 struct zebra_if *br_zif;
3121 char buf[ETHER_ADDR_STRLEN];
3122
3123 memset(&req, 0, sizeof(req));
3124 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3125 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3126 req.n.nlmsg_flags = NLM_F_REQUEST;
3127 req.ndm.ndm_family = family; /* AF_BRIDGE */
3128 /* req.ndm.ndm_state = NUD_REACHABLE; */
3129
3130 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
3131
3132 br_zif = (struct zebra_if *)br_if->info;
3133 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
3134 nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
3135
3136 nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
3137
3138 if (IS_ZEBRA_DEBUG_KERNEL)
3139 zlog_debug(
3140 "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %s vid %u",
3141 __func__, nl_family_to_str(req.ndm.ndm_family),
3142 br_if->name, br_if->ifindex,
3143 vrf_id_to_name(br_if->vrf_id), br_if->vrf_id,
3144 prefix_mac2str(mac, buf, sizeof(buf)), vid);
3145
3146 return netlink_request(&zns->netlink_cmd, &req);
3147 }
3148
3149 int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3150 struct interface *br_if,
3151 struct ethaddr *mac, vlanid_t vid)
3152 {
3153 int ret = 0;
3154 struct zebra_dplane_info dp_info;
3155
3156 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3157
3158 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3159 */
3160 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
3161 RTM_GETNEIGH,
3162 br_if, mac, vid);
3163 if (ret < 0)
3164 return ret;
3165
3166 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3167 &dp_info, 1, 0);
3168
3169 return ret;
3170 }
3171
3172 /*
3173 * Netlink-specific handler for MAC updates using dataplane context object.
3174 */
3175 ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data,
3176 size_t datalen)
3177 {
3178 struct ipaddr vtep_ip;
3179 vlanid_t vid;
3180 ssize_t total;
3181 int cmd;
3182 uint8_t flags;
3183 uint16_t state;
3184 uint32_t nhg_id;
3185 uint32_t update_flags;
3186 bool nfy = false;
3187 uint8_t nfy_flags = 0;
3188
3189 cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3190 ? RTM_NEWNEIGH : RTM_DELNEIGH;
3191
3192 flags = NTF_MASTER;
3193 state = NUD_REACHABLE;
3194
3195 update_flags = dplane_ctx_mac_get_update_flags(ctx);
3196 if (update_flags & DPLANE_MAC_REMOTE) {
3197 flags |= NTF_SELF;
3198 if (dplane_ctx_mac_is_sticky(ctx))
3199 flags |= NTF_STICKY;
3200 else
3201 flags |= NTF_EXT_LEARNED;
3202 /* if it was static-local previously we need to clear the
3203 * notify flags on replace with remote
3204 */
3205 if (update_flags & DPLANE_MAC_WAS_STATIC)
3206 nfy = true;
3207 } else {
3208 /* local mac */
3209 if (update_flags & DPLANE_MAC_SET_STATIC) {
3210 nfy_flags |= (1 << BR_FDB_NFY_STATIC);
3211 state |= NUD_NOARP;
3212 }
3213
3214 if (update_flags & DPLANE_MAC_SET_INACTIVE)
3215 nfy_flags |= (1 << BR_FDB_NFY_INACTIVE);
3216
3217 nfy = true;
3218 }
3219
3220 nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
3221 vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
3222 SET_IPADDR_V4(&vtep_ip);
3223
3224 if (IS_ZEBRA_DEBUG_KERNEL) {
3225 char ipbuf[PREFIX_STRLEN];
3226 char buf[ETHER_ADDR_STRLEN];
3227 char vid_buf[20];
3228 const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
3229
3230 vid = dplane_ctx_mac_get_vlan(ctx);
3231 if (vid > 0)
3232 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3233 else
3234 vid_buf[0] = '\0';
3235
3236 zlog_debug(
3237 "Tx %s family %s IF %s(%u)%s %sMAC %s dst %s nhg %u%s%s%s%s%s",
3238 nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
3239 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3240 vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
3241 prefix_mac2str(mac, buf, sizeof(buf)),
3242 ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf)), nhg_id,
3243 (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "",
3244 (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync"
3245 : "",
3246 (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "",
3247 (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive"
3248 : "",
3249 nfy ? " nfy" : "");
3250 }
3251
3252 total = netlink_neigh_update_msg_encode(
3253 ctx, cmd, dplane_ctx_mac_get_addr(ctx), &vtep_ip, true,
3254 AF_BRIDGE, 0, flags, state, nhg_id, nfy, nfy_flags,
3255 false /*ext*/, 0 /*ext_flags*/, data, datalen);
3256
3257 return total;
3258 }
3259
3260 /*
3261 * In the event the kernel deletes ipv4 link-local neighbor entries created for
3262 * 5549 support, re-install them.
3263 */
3264 static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
3265 struct interface *ifp, struct ipaddr *ip,
3266 bool handle_failed)
3267 {
3268 if (ndm->ndm_family != AF_INET)
3269 return;
3270
3271 if (!zif->v6_2_v4_ll_neigh_entry)
3272 return;
3273
3274 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
3275 return;
3276
3277 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
3278 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
3279 ifp->name);
3280 return;
3281 }
3282
3283 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
3284 }
3285
3286 #define NUD_VALID \
3287 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
3288 | NUD_DELAY)
3289 #define NUD_LOCAL_ACTIVE \
3290 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
3291
3292 static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3293 {
3294 struct ndmsg *ndm;
3295 struct interface *ifp;
3296 struct zebra_if *zif;
3297 struct rtattr *tb[NDA_MAX + 1];
3298 struct interface *link_if;
3299 struct ethaddr mac;
3300 struct ipaddr ip;
3301 struct vrf *vrf;
3302 char buf[ETHER_ADDR_STRLEN];
3303 char buf2[INET6_ADDRSTRLEN];
3304 int mac_present = 0;
3305 bool is_ext;
3306 bool is_router;
3307 bool local_inactive;
3308
3309 ndm = NLMSG_DATA(h);
3310
3311 /* The interface should exist. */
3312 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3313 ndm->ndm_ifindex);
3314 if (!ifp || !ifp->info)
3315 return 0;
3316
3317 vrf = vrf_lookup_by_id(ifp->vrf_id);
3318 zif = (struct zebra_if *)ifp->info;
3319
3320 /* Parse attributes and extract fields of interest. */
3321 memset(tb, 0, sizeof(tb));
3322 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3323
3324 if (!tb[NDA_DST]) {
3325 zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
3326 nl_msg_type_to_str(h->nlmsg_type),
3327 nl_family_to_str(ndm->ndm_family), ifp->name,
3328 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id);
3329 return 0;
3330 }
3331
3332 memset(&ip, 0, sizeof(struct ipaddr));
3333 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3334 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3335
3336 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3337 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
3338 netlink_handle_5549(ndm, zif, ifp, &ip, false);
3339 if (IS_ZEBRA_DEBUG_KERNEL)
3340 zlog_debug(
3341 "\tNeighbor Entry Received is a 5549 entry, finished");
3342 return 0;
3343 }
3344
3345 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
3346 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3347 netlink_handle_5549(ndm, zif, ifp, &ip, true);
3348
3349 /* The neighbor is present on an SVI. From this, we locate the
3350 * underlying
3351 * bridge because we're only interested in neighbors on a VxLAN bridge.
3352 * The bridge is located based on the nature of the SVI:
3353 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
3354 * interface
3355 * and is linked to the bridge
3356 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
3357 * inteface
3358 * itself
3359 */
3360 if (IS_ZEBRA_IF_VLAN(ifp)) {
3361 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3362 zif->link_ifindex);
3363 if (!link_if)
3364 return 0;
3365 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
3366 link_if = ifp;
3367 else {
3368 if (IS_ZEBRA_DEBUG_KERNEL)
3369 zlog_debug(
3370 "\tNeighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
3371 return 0;
3372 }
3373
3374 memset(&mac, 0, sizeof(struct ethaddr));
3375 if (h->nlmsg_type == RTM_NEWNEIGH) {
3376 if (tb[NDA_LLADDR]) {
3377 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
3378 if (IS_ZEBRA_DEBUG_KERNEL)
3379 zlog_debug(
3380 "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
3381 nl_msg_type_to_str(
3382 h->nlmsg_type),
3383 nl_family_to_str(
3384 ndm->ndm_family),
3385 ifp->name, ndm->ndm_ifindex,
3386 VRF_LOGNAME(vrf), ifp->vrf_id,
3387 (unsigned long)RTA_PAYLOAD(
3388 tb[NDA_LLADDR]));
3389 return 0;
3390 }
3391
3392 mac_present = 1;
3393 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
3394 }
3395
3396 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
3397 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
3398
3399 if (IS_ZEBRA_DEBUG_KERNEL)
3400 zlog_debug(
3401 "Rx %s family %s IF %s(%u) vrf %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
3402 nl_msg_type_to_str(h->nlmsg_type),
3403 nl_family_to_str(ndm->ndm_family), ifp->name,
3404 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id,
3405 ipaddr2str(&ip, buf2, sizeof(buf2)),
3406 mac_present
3407 ? prefix_mac2str(&mac, buf, sizeof(buf))
3408 : "",
3409 ndm->ndm_state, ndm->ndm_flags);
3410
3411 /* If the neighbor state is valid for use, process as an add or
3412 * update
3413 * else process as a delete. Note that the delete handling may
3414 * result
3415 * in re-adding the neighbor if it is a valid "remote" neighbor.
3416 */
3417 if (ndm->ndm_state & NUD_VALID) {
3418 local_inactive = !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
3419
3420 /* XXX - populate dp-static based on the sync flags
3421 * in the kernel
3422 */
3423 return zebra_vxlan_handle_kernel_neigh_update(
3424 ifp, link_if, &ip, &mac, ndm->ndm_state,
3425 is_ext, is_router, local_inactive,
3426 false /* dp_static */);
3427 }
3428
3429 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
3430 }
3431
3432 if (IS_ZEBRA_DEBUG_KERNEL)
3433 zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %s",
3434 nl_msg_type_to_str(h->nlmsg_type),
3435 nl_family_to_str(ndm->ndm_family), ifp->name,
3436 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id,
3437 ipaddr2str(&ip, buf2, sizeof(buf2)));
3438
3439 /* Process the delete - it may result in re-adding the neighbor if it is
3440 * a valid "remote" neighbor.
3441 */
3442 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
3443 }
3444
3445 static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3446 {
3447 int len;
3448 struct ndmsg *ndm;
3449
3450 if (h->nlmsg_type != RTM_NEWNEIGH)
3451 return 0;
3452
3453 /* Length validity. */
3454 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3455 if (len < 0)
3456 return -1;
3457
3458 /* We are interested only in AF_INET or AF_INET6 notifications. */
3459 ndm = NLMSG_DATA(h);
3460 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
3461 return 0;
3462
3463 return netlink_neigh_change(h, len);
3464 }
3465
3466 /* Request for IP neighbor information from the kernel */
3467 static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
3468 int type, ifindex_t ifindex)
3469 {
3470 struct {
3471 struct nlmsghdr n;
3472 struct ndmsg ndm;
3473 char buf[256];
3474 } req;
3475
3476 /* Form the request, specifying filter (rtattr) if needed. */
3477 memset(&req, 0, sizeof(req));
3478 req.n.nlmsg_type = type;
3479 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3480 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3481 req.ndm.ndm_family = family;
3482 if (ifindex)
3483 nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
3484
3485 return netlink_request(netlink_cmd, &req);
3486 }
3487
3488 /*
3489 * IP Neighbor table read using netlink interface. This is invoked
3490 * at startup.
3491 */
3492 int netlink_neigh_read(struct zebra_ns *zns)
3493 {
3494 int ret;
3495 struct zebra_dplane_info dp_info;
3496
3497 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3498
3499 /* Get IP neighbor table. */
3500 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
3501 0);
3502 if (ret < 0)
3503 return ret;
3504 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3505 &dp_info, 0, 1);
3506
3507 return ret;
3508 }
3509
3510 /*
3511 * IP Neighbor table read using netlink interface. This is for a specific
3512 * VLAN device.
3513 */
3514 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
3515 {
3516 int ret = 0;
3517 struct zebra_dplane_info dp_info;
3518
3519 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3520
3521 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
3522 vlan_if->ifindex);
3523 if (ret < 0)
3524 return ret;
3525 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3526 &dp_info, 0, 0);
3527
3528 return ret;
3529 }
3530
3531 /*
3532 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
3533 * read using netlink interface.
3534 */
3535 static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
3536 int type, struct ipaddr *ip,
3537 ifindex_t ifindex)
3538 {
3539 struct {
3540 struct nlmsghdr n;
3541 struct ndmsg ndm;
3542 char buf[256];
3543 } req;
3544 int ipa_len;
3545
3546 /* Form the request, specifying filter (rtattr) if needed. */
3547 memset(&req, 0, sizeof(req));
3548 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3549 req.n.nlmsg_flags = NLM_F_REQUEST;
3550 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3551 req.ndm.ndm_ifindex = ifindex;
3552
3553 if (IS_IPADDR_V4(ip)) {
3554 ipa_len = IPV4_MAX_BYTELEN;
3555 req.ndm.ndm_family = AF_INET;
3556
3557 } else {
3558 ipa_len = IPV6_MAX_BYTELEN;
3559 req.ndm.ndm_family = AF_INET6;
3560 }
3561
3562 nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
3563
3564 if (IS_ZEBRA_DEBUG_KERNEL) {
3565 char buf[INET6_ADDRSTRLEN];
3566
3567 zlog_debug("%s: Tx %s family %s IF %u IP %s flags 0x%x",
3568 __func__, nl_msg_type_to_str(type),
3569 nl_family_to_str(req.ndm.ndm_family), ifindex,
3570 ipaddr2str(ip, buf, sizeof(buf)), req.n.nlmsg_flags);
3571 }
3572
3573 return netlink_request(&zns->netlink_cmd, &req);
3574 }
3575
3576 int netlink_neigh_read_specific_ip(struct ipaddr *ip,
3577 struct interface *vlan_if)
3578 {
3579 int ret = 0;
3580 struct zebra_ns *zns;
3581 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vlan_if->vrf_id);
3582 char buf[INET6_ADDRSTRLEN];
3583 struct zebra_dplane_info dp_info;
3584
3585 zns = zvrf->zns;
3586
3587 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3588
3589 if (IS_ZEBRA_DEBUG_KERNEL)
3590 zlog_debug("%s: neigh request IF %s(%u) IP %s vrf %s(%u)",
3591 __func__, vlan_if->name, vlan_if->ifindex,
3592 ipaddr2str(ip, buf, sizeof(buf)),
3593 vrf_id_to_name(vlan_if->vrf_id), vlan_if->vrf_id);
3594
3595 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
3596 vlan_if->ifindex);
3597 if (ret < 0)
3598 return ret;
3599
3600 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3601 &dp_info, 1, 0);
3602
3603 return ret;
3604 }
3605
3606 int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
3607 {
3608 int len;
3609 struct ndmsg *ndm;
3610
3611 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
3612 return 0;
3613
3614 /* Length validity. */
3615 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3616 if (len < 0) {
3617 zlog_err(
3618 "%s: Message received from netlink is of a broken size %d %zu",
3619 __func__, h->nlmsg_len,
3620 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
3621 return -1;
3622 }
3623
3624 /* Is this a notification for the MAC FDB or IP neighbor table? */
3625 ndm = NLMSG_DATA(h);
3626 if (ndm->ndm_family == AF_BRIDGE)
3627 return netlink_macfdb_change(h, len, ns_id);
3628
3629 if (ndm->ndm_type != RTN_UNICAST)
3630 return 0;
3631
3632 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
3633 return netlink_ipneigh_change(h, len, ns_id);
3634 else {
3635 flog_warn(
3636 EC_ZEBRA_UNKNOWN_FAMILY,
3637 "Invalid address family: %u received from kernel neighbor change: %s",
3638 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
3639 return 0;
3640 }
3641
3642 return 0;
3643 }
3644
3645 /*
3646 * Utility neighbor-update function, using info from dplane context.
3647 */
3648 static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
3649 int cmd, void *buf, size_t buflen)
3650 {
3651 const struct ipaddr *ip;
3652 const struct ethaddr *mac;
3653 uint8_t flags;
3654 uint16_t state;
3655 uint8_t family;
3656 uint32_t update_flags;
3657 uint32_t ext_flags = 0;
3658 bool ext = false;
3659
3660 ip = dplane_ctx_neigh_get_ipaddr(ctx);
3661 mac = dplane_ctx_neigh_get_mac(ctx);
3662 if (is_zero_mac(mac))
3663 mac = NULL;
3664
3665 update_flags = dplane_ctx_neigh_get_update_flags(ctx);
3666 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
3667 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
3668
3669 family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
3670
3671 if (update_flags & DPLANE_NEIGH_REMOTE) {
3672 flags |= NTF_EXT_LEARNED;
3673 /* if it was static-local previously we need to clear the
3674 * ext flags on replace with remote
3675 */
3676 if (update_flags & DPLANE_NEIGH_WAS_STATIC)
3677 ext = true;
3678 } else {
3679 ext = true;
3680 /* local neigh */
3681 if (update_flags & DPLANE_NEIGH_SET_STATIC)
3682 ext_flags |= NTF_E_MH_PEER_SYNC;
3683
3684 /* the ndm_state set for local entries can be REACHABLE or
3685 * STALE. if the dataplane has already establish reachability
3686 * (in the meantime) FRR must not over-write it with STALE.
3687 * this accidental race/over-write is avoided by using the
3688 * WEAK_OVERRIDE_STATE
3689 */
3690 ext_flags |= NTF_E_WEAK_OVERRIDE_STATE;
3691 }
3692 if (IS_ZEBRA_DEBUG_KERNEL) {
3693 char buf[INET6_ADDRSTRLEN];
3694 char buf2[ETHER_ADDR_STRLEN];
3695
3696 zlog_debug(
3697 "Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x",
3698 nl_msg_type_to_str(cmd), nl_family_to_str(family),
3699 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3700 ipaddr2str(ip, buf, sizeof(buf)),
3701 mac ? prefix_mac2str(mac, buf2, sizeof(buf2)) : "null",
3702 flags, state);
3703 }
3704
3705 return netlink_neigh_update_msg_encode(
3706 ctx, cmd, mac, ip, true, family, RTN_UNICAST, flags, state,
3707 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext, ext_flags, buf,
3708 buflen);
3709 }
3710
3711 static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx,
3712 void *buf, size_t buflen)
3713 {
3714 ssize_t ret;
3715
3716 switch (dplane_ctx_get_op(ctx)) {
3717 case DPLANE_OP_NEIGH_INSTALL:
3718 case DPLANE_OP_NEIGH_UPDATE:
3719 case DPLANE_OP_NEIGH_DISCOVER:
3720 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen);
3721 break;
3722 case DPLANE_OP_NEIGH_DELETE:
3723 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen);
3724 break;
3725 case DPLANE_OP_VTEP_ADD:
3726 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf,
3727 buflen);
3728 break;
3729 case DPLANE_OP_VTEP_DELETE:
3730 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf,
3731 buflen);
3732 break;
3733 default:
3734 ret = -1;
3735 }
3736
3737 return ret;
3738 }
3739
3740 /*
3741 * Update MAC, using dataplane context object.
3742 */
3743
3744 enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth,
3745 struct zebra_dplane_ctx *ctx)
3746 {
3747 return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx,
3748 false);
3749 }
3750
3751 enum netlink_msg_status
3752 netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
3753 {
3754 return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder,
3755 false);
3756 }
3757
3758 /*
3759 * MPLS label forwarding table change via netlink interface, using dataplane
3760 * context information.
3761 */
3762 ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
3763 void *buf, size_t buflen)
3764 {
3765 mpls_lse_t lse;
3766 const struct nhlfe_list_head *head;
3767 const zebra_nhlfe_t *nhlfe;
3768 struct nexthop *nexthop = NULL;
3769 unsigned int nexthop_num;
3770 const char *routedesc;
3771 int route_type;
3772 struct prefix p = {0};
3773
3774 struct {
3775 struct nlmsghdr n;
3776 struct rtmsg r;
3777 char buf[0];
3778 } *req = buf;
3779
3780 if (buflen < sizeof(*req))
3781 return 0;
3782
3783 memset(req, 0, sizeof(*req));
3784
3785 /*
3786 * Count # nexthops so we can decide whether to use singlepath
3787 * or multipath case.
3788 */
3789 nexthop_num = 0;
3790 head = dplane_ctx_get_nhlfe_list(ctx);
3791 frr_each(nhlfe_list_const, head, nhlfe) {
3792 nexthop = nhlfe->nexthop;
3793 if (!nexthop)
3794 continue;
3795 if (cmd == RTM_NEWROUTE) {
3796 /* Count all selected NHLFEs */
3797 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3798 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
3799 nexthop_num++;
3800 } else { /* DEL */
3801 /* Count all installed NHLFEs */
3802 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
3803 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
3804 nexthop_num++;
3805 }
3806 }
3807
3808 if ((nexthop_num == 0) ||
3809 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
3810 return 0;
3811
3812 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
3813 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
3814 req->n.nlmsg_type = cmd;
3815 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
3816
3817 req->r.rtm_family = AF_MPLS;
3818 req->r.rtm_table = RT_TABLE_MAIN;
3819 req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
3820 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
3821 req->r.rtm_type = RTN_UNICAST;
3822
3823 if (cmd == RTM_NEWROUTE) {
3824 /* We do a replace to handle update. */
3825 req->n.nlmsg_flags |= NLM_F_REPLACE;
3826
3827 /* set the protocol value if installing */
3828 route_type = re_type_from_lsp_type(
3829 dplane_ctx_get_best_nhlfe(ctx)->type);
3830 req->r.rtm_protocol = zebra2proto(route_type);
3831 }
3832
3833 /* Fill destination */
3834 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
3835 if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
3836 return 0;
3837
3838 /* Fill nexthops (paths) based on single-path or multipath. The paths
3839 * chosen depend on the operation.
3840 */
3841 if (nexthop_num == 1) {
3842 routedesc = "single-path";
3843 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3844 routedesc);
3845
3846 nexthop_num = 0;
3847 frr_each(nhlfe_list_const, head, nhlfe) {
3848 nexthop = nhlfe->nexthop;
3849 if (!nexthop)
3850 continue;
3851
3852 if ((cmd == RTM_NEWROUTE
3853 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3854 && CHECK_FLAG(nexthop->flags,
3855 NEXTHOP_FLAG_ACTIVE)))
3856 || (cmd == RTM_DELROUTE
3857 && (CHECK_FLAG(nhlfe->flags,
3858 NHLFE_FLAG_INSTALLED)
3859 && CHECK_FLAG(nexthop->flags,
3860 NEXTHOP_FLAG_FIB)))) {
3861 /* Add the gateway */
3862 if (!_netlink_mpls_build_singlepath(
3863 &p, routedesc, nhlfe, &req->n,
3864 &req->r, buflen, cmd))
3865 return false;
3866
3867 nexthop_num++;
3868 break;
3869 }
3870 }
3871 } else { /* Multipath case */
3872 struct rtattr *nest;
3873 const union g_addr *src1 = NULL;
3874
3875 nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
3876 if (!nest)
3877 return 0;
3878
3879 routedesc = "multipath";
3880 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3881 routedesc);
3882
3883 nexthop_num = 0;
3884 frr_each(nhlfe_list_const, head, nhlfe) {
3885 nexthop = nhlfe->nexthop;
3886 if (!nexthop)
3887 continue;
3888
3889 if ((cmd == RTM_NEWROUTE
3890 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3891 && CHECK_FLAG(nexthop->flags,
3892 NEXTHOP_FLAG_ACTIVE)))
3893 || (cmd == RTM_DELROUTE
3894 && (CHECK_FLAG(nhlfe->flags,
3895 NHLFE_FLAG_INSTALLED)
3896 && CHECK_FLAG(nexthop->flags,
3897 NEXTHOP_FLAG_FIB)))) {
3898 nexthop_num++;
3899
3900 /* Build the multipath */
3901 if (!_netlink_mpls_build_multipath(
3902 &p, routedesc, nhlfe, &req->n,
3903 buflen, &req->r, &src1))
3904 return 0;
3905 }
3906 }
3907
3908 /* Add the multipath */
3909 nl_attr_nest_end(&req->n, nest);
3910 }
3911
3912 return NLMSG_ALIGN(req->n.nlmsg_len);
3913 }
3914
3915 /****************************************************************************
3916 * This code was developed in a branch that didn't have dplane APIs for
3917 * MAC updates. Hence the use of the legacy style. It will be moved to
3918 * the new dplane style pre-merge to master. XXX
3919 */
3920 static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
3921 {
3922 struct {
3923 struct nlmsghdr n;
3924 struct nhmsg nhm;
3925 char buf[256];
3926 } req;
3927 int cmd = RTM_NEWNEXTHOP;
3928 struct zebra_vrf *zvrf;
3929 struct zebra_ns *zns;
3930
3931 zvrf = zebra_vrf_get_evpn();
3932 if (!zvrf)
3933 return -1;
3934 zns = zvrf->zns;
3935
3936 memset(&req, 0, sizeof(req));
3937
3938 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3939 req.n.nlmsg_flags = NLM_F_REQUEST;
3940 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
3941 req.n.nlmsg_type = cmd;
3942 req.nhm.nh_family = AF_INET;
3943
3944 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
3945 return -1;
3946 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
3947 return -1;
3948 if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
3949 &vtep_ip, IPV4_MAX_BYTELEN))
3950 return -1;
3951
3952 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3953 zlog_debug("Tx %s fdb-nh 0x%x %s",
3954 nl_msg_type_to_str(cmd), nh_id, inet_ntoa(vtep_ip));
3955 }
3956
3957 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
3958 0);
3959 }
3960
3961 static int netlink_fdb_nh_del(uint32_t nh_id)
3962 {
3963 struct {
3964 struct nlmsghdr n;
3965 struct nhmsg nhm;
3966 char buf[256];
3967 } req;
3968 int cmd = RTM_DELNEXTHOP;
3969 struct zebra_vrf *zvrf;
3970 struct zebra_ns *zns;
3971
3972 zvrf = zebra_vrf_get_evpn();
3973 if (!zvrf)
3974 return -1;
3975 zns = zvrf->zns;
3976
3977 memset(&req, 0, sizeof(req));
3978
3979 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3980 req.n.nlmsg_flags = NLM_F_REQUEST;
3981 req.n.nlmsg_type = cmd;
3982 req.nhm.nh_family = AF_UNSPEC;
3983
3984 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
3985 return -1;
3986
3987 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3988 zlog_debug("Tx %s fdb-nh 0x%x",
3989 nl_msg_type_to_str(cmd), nh_id);
3990 }
3991
3992 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
3993 0);
3994 }
3995
3996 static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
3997 struct nh_grp *nh_ids)
3998 {
3999 struct {
4000 struct nlmsghdr n;
4001 struct nhmsg nhm;
4002 char buf[256];
4003 } req;
4004 int cmd = RTM_NEWNEXTHOP;
4005 struct zebra_vrf *zvrf;
4006 struct zebra_ns *zns;
4007 struct nexthop_grp grp[nh_cnt];
4008 uint32_t i;
4009
4010 zvrf = zebra_vrf_get_evpn();
4011 if (!zvrf)
4012 return -1;
4013 zns = zvrf->zns;
4014
4015 memset(&req, 0, sizeof(req));
4016
4017 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4018 req.n.nlmsg_flags = NLM_F_REQUEST;
4019 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4020 req.n.nlmsg_type = cmd;
4021 req.nhm.nh_family = AF_UNSPEC;
4022
4023 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
4024 return -1;
4025 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4026 return -1;
4027 memset(&grp, 0, sizeof(grp));
4028 for (i = 0; i < nh_cnt; ++i) {
4029 grp[i].id = nh_ids[i].id;
4030 grp[i].weight = nh_ids[i].weight;
4031 }
4032 if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
4033 grp, nh_cnt * sizeof(struct nexthop_grp)))
4034 return -1;
4035
4036
4037 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4038 char vtep_str[ES_VTEP_LIST_STR_SZ];
4039 char nh_buf[16];
4040
4041 vtep_str[0] = '\0';
4042 for (i = 0; i < nh_cnt; ++i) {
4043 snprintf(nh_buf, sizeof(nh_buf), "%u ",
4044 grp[i].id);
4045 strlcat(vtep_str, nh_buf, sizeof(vtep_str));
4046 }
4047
4048 zlog_debug("Tx %s fdb-nhg 0x%x %s",
4049 nl_msg_type_to_str(cmd), nhg_id, vtep_str);
4050 }
4051
4052 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4053 0);
4054 }
4055
4056 static int netlink_fdb_nhg_del(uint32_t nhg_id)
4057 {
4058 return netlink_fdb_nh_del(nhg_id);
4059 }
4060
4061 int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
4062 {
4063 return netlink_fdb_nh_update(nh_id, vtep_ip);
4064 }
4065
4066 int kernel_del_mac_nh(uint32_t nh_id)
4067 {
4068 return netlink_fdb_nh_del(nh_id);
4069 }
4070
4071 int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
4072 struct nh_grp *nh_ids)
4073 {
4074 return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
4075 }
4076
4077 int kernel_del_mac_nhg(uint32_t nhg_id)
4078 {
4079 return netlink_fdb_nhg_del(nhg_id);
4080 }
4081
4082 #endif /* HAVE_NETLINK */