]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
Merge pull request #12679 from spk-hebbar/master
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 /* The following definition is to workaround an issue in the Linux kernel
26 * header files with redefinition of 'struct in6_addr' in both
27 * netinet/in.h and linux/in6.h.
28 * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html
29 */
30 #define _LINUX_IN6_H
31
32 #include <net/if_arp.h>
33 #include <linux/lwtunnel.h>
34 #include <linux/mpls_iptunnel.h>
35 #include <linux/seg6_iptunnel.h>
36 #include <linux/seg6_local.h>
37 #include <linux/neighbour.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/nexthop.h>
40
41 /* Hack for GNU libc version 2. */
42 #ifndef MSG_TRUNC
43 #define MSG_TRUNC 0x20
44 #endif /* MSG_TRUNC */
45
46 #include "linklist.h"
47 #include "if.h"
48 #include "log.h"
49 #include "prefix.h"
50 #include "plist.h"
51 #include "plist_int.h"
52 #include "connected.h"
53 #include "table.h"
54 #include "memory.h"
55 #include "rib.h"
56 #include "thread.h"
57 #include "privs.h"
58 #include "nexthop.h"
59 #include "vrf.h"
60 #include "vty.h"
61 #include "mpls.h"
62 #include "vxlan.h"
63 #include "printfrr.h"
64
65 #include "zebra/zapi_msg.h"
66 #include "zebra/zebra_ns.h"
67 #include "zebra/zebra_vrf.h"
68 #include "zebra/rt.h"
69 #include "zebra/redistribute.h"
70 #include "zebra/interface.h"
71 #include "zebra/debug.h"
72 #include "zebra/rtadv.h"
73 #include "zebra/zebra_ptm.h"
74 #include "zebra/zebra_mpls.h"
75 #include "zebra/kernel_netlink.h"
76 #include "zebra/rt_netlink.h"
77 #include "zebra/zebra_nhg.h"
78 #include "zebra/zebra_mroute.h"
79 #include "zebra/zebra_vxlan.h"
80 #include "zebra/zebra_errors.h"
81 #include "zebra/zebra_evpn_mh.h"
82 #include "zebra/zebra_trace.h"
83 #include "zebra/zebra_neigh.h"
84
85 #ifndef AF_MPLS
86 #define AF_MPLS 28
87 #endif
88
89 /* Re-defining as I am unable to include <linux/if_bridge.h> which has the
90 * UAPI for MAC sync. */
91 #ifndef _UAPI_LINUX_IF_BRIDGE_H
92 #define BR_SPH_LIST_SIZE 10
93 #endif
94
95 static vlanid_t filter_vlan = 0;
96
97 /* We capture whether the current kernel supports nexthop ids; by
98 * default, we'll use them if possible. There's also a configuration
99 * available to _disable_ use of kernel nexthops.
100 */
101 static bool supports_nh;
102
103 struct gw_family_t {
104 uint16_t filler;
105 uint16_t family;
106 union g_addr gate;
107 };
108
109 static const char ipv4_ll_buf[16] = "169.254.0.1";
110 static struct in_addr ipv4_ll;
111
112 /* Is this a ipv4 over ipv6 route? */
113 static bool is_route_v4_over_v6(unsigned char rtm_family,
114 enum nexthop_types_t nexthop_type)
115 {
116 if (rtm_family == AF_INET
117 && (nexthop_type == NEXTHOP_TYPE_IPV6
118 || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
119 return true;
120
121 return false;
122 }
123
124 /* Helper to control use of kernel-level nexthop ids */
125 static bool kernel_nexthops_supported(void)
126 {
127 return (supports_nh && !vrf_is_backend_netns()
128 && zebra_nhg_kernel_nexthops_enabled());
129 }
130
131 /*
132 * Some people may only want to use NHGs created by protos and not
133 * implicitly created by Zebra. This check accounts for that.
134 */
135 static bool proto_nexthops_only(void)
136 {
137 return zebra_nhg_proto_nexthops_only();
138 }
139
140 /* Is this a proto created NHG? */
141 static bool is_proto_nhg(uint32_t id, int type)
142 {
143 /* If type is available, use it as the source of truth */
144 if (type) {
145 if (type != ZEBRA_ROUTE_NHG)
146 return true;
147 return false;
148 }
149
150 if (id >= ZEBRA_NHG_PROTO_LOWER)
151 return true;
152
153 return false;
154 }
155
156 /*
157 * The ipv4_ll data structure is used for all 5549
158 * additions to the kernel. Let's figure out the
159 * correct value one time instead for every
160 * install/remove of a 5549 type route
161 */
162 void rt_netlink_init(void)
163 {
164 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
165 }
166
167 /*
168 * Mapping from dataplane neighbor flags to netlink flags
169 */
170 static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
171 {
172 uint8_t flags = 0;
173
174 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
175 flags |= NTF_EXT_LEARNED;
176 if (dplane_flags & DPLANE_NTF_ROUTER)
177 flags |= NTF_ROUTER;
178 if (dplane_flags & DPLANE_NTF_USE)
179 flags |= NTF_USE;
180
181 return flags;
182 }
183
184 /*
185 * Mapping from dataplane neighbor state to netlink state
186 */
187 static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
188 {
189 uint16_t state = 0;
190
191 if (dplane_state & DPLANE_NUD_REACHABLE)
192 state |= NUD_REACHABLE;
193 if (dplane_state & DPLANE_NUD_STALE)
194 state |= NUD_STALE;
195 if (dplane_state & DPLANE_NUD_NOARP)
196 state |= NUD_NOARP;
197 if (dplane_state & DPLANE_NUD_PROBE)
198 state |= NUD_PROBE;
199 if (dplane_state & DPLANE_NUD_INCOMPLETE)
200 state |= NUD_INCOMPLETE;
201 if (dplane_state & DPLANE_NUD_PERMANENT)
202 state |= NUD_PERMANENT;
203 if (dplane_state & DPLANE_NUD_FAILED)
204 state |= NUD_FAILED;
205
206 return state;
207 }
208
209
210 static inline bool is_selfroute(int proto)
211 {
212 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
213 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
214 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
215 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
216 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
217 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
218 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)
219 || (proto == RTPROT_SRTE)) {
220 return true;
221 }
222
223 return false;
224 }
225
226 int zebra2proto(int proto)
227 {
228 switch (proto) {
229 case ZEBRA_ROUTE_BABEL:
230 proto = RTPROT_BABEL;
231 break;
232 case ZEBRA_ROUTE_BGP:
233 proto = RTPROT_BGP;
234 break;
235 case ZEBRA_ROUTE_OSPF:
236 case ZEBRA_ROUTE_OSPF6:
237 proto = RTPROT_OSPF;
238 break;
239 case ZEBRA_ROUTE_STATIC:
240 proto = RTPROT_ZSTATIC;
241 break;
242 case ZEBRA_ROUTE_ISIS:
243 proto = RTPROT_ISIS;
244 break;
245 case ZEBRA_ROUTE_RIP:
246 proto = RTPROT_RIP;
247 break;
248 case ZEBRA_ROUTE_RIPNG:
249 proto = RTPROT_RIPNG;
250 break;
251 case ZEBRA_ROUTE_NHRP:
252 proto = RTPROT_NHRP;
253 break;
254 case ZEBRA_ROUTE_EIGRP:
255 proto = RTPROT_EIGRP;
256 break;
257 case ZEBRA_ROUTE_LDP:
258 proto = RTPROT_LDP;
259 break;
260 case ZEBRA_ROUTE_SHARP:
261 proto = RTPROT_SHARP;
262 break;
263 case ZEBRA_ROUTE_PBR:
264 proto = RTPROT_PBR;
265 break;
266 case ZEBRA_ROUTE_OPENFABRIC:
267 proto = RTPROT_OPENFABRIC;
268 break;
269 case ZEBRA_ROUTE_SRTE:
270 proto = RTPROT_SRTE;
271 break;
272 case ZEBRA_ROUTE_TABLE:
273 case ZEBRA_ROUTE_NHG:
274 proto = RTPROT_ZEBRA;
275 break;
276 case ZEBRA_ROUTE_CONNECT:
277 case ZEBRA_ROUTE_KERNEL:
278 proto = RTPROT_KERNEL;
279 break;
280 default:
281 /*
282 * When a user adds a new protocol this will show up
283 * to let them know to do something about it. This
284 * is intentionally a warn because we should see
285 * this as part of development of a new protocol
286 */
287 zlog_debug(
288 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
289 __func__, proto);
290 proto = RTPROT_ZEBRA;
291 break;
292 }
293
294 return proto;
295 }
296
297 static inline int proto2zebra(int proto, int family, bool is_nexthop)
298 {
299 switch (proto) {
300 case RTPROT_BABEL:
301 proto = ZEBRA_ROUTE_BABEL;
302 break;
303 case RTPROT_BGP:
304 proto = ZEBRA_ROUTE_BGP;
305 break;
306 case RTPROT_OSPF:
307 proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF
308 : ZEBRA_ROUTE_OSPF6;
309 break;
310 case RTPROT_ISIS:
311 proto = ZEBRA_ROUTE_ISIS;
312 break;
313 case RTPROT_RIP:
314 proto = ZEBRA_ROUTE_RIP;
315 break;
316 case RTPROT_RIPNG:
317 proto = ZEBRA_ROUTE_RIPNG;
318 break;
319 case RTPROT_NHRP:
320 proto = ZEBRA_ROUTE_NHRP;
321 break;
322 case RTPROT_EIGRP:
323 proto = ZEBRA_ROUTE_EIGRP;
324 break;
325 case RTPROT_LDP:
326 proto = ZEBRA_ROUTE_LDP;
327 break;
328 case RTPROT_STATIC:
329 case RTPROT_ZSTATIC:
330 proto = ZEBRA_ROUTE_STATIC;
331 break;
332 case RTPROT_SHARP:
333 proto = ZEBRA_ROUTE_SHARP;
334 break;
335 case RTPROT_PBR:
336 proto = ZEBRA_ROUTE_PBR;
337 break;
338 case RTPROT_OPENFABRIC:
339 proto = ZEBRA_ROUTE_OPENFABRIC;
340 break;
341 case RTPROT_SRTE:
342 proto = ZEBRA_ROUTE_SRTE;
343 break;
344 case RTPROT_UNSPEC:
345 case RTPROT_REDIRECT:
346 case RTPROT_KERNEL:
347 case RTPROT_BOOT:
348 case RTPROT_GATED:
349 case RTPROT_RA:
350 case RTPROT_MRT:
351 case RTPROT_BIRD:
352 case RTPROT_DNROUTED:
353 case RTPROT_XORP:
354 case RTPROT_NTK:
355 case RTPROT_MROUTED:
356 case RTPROT_KEEPALIVED:
357 case RTPROT_OPENR:
358 proto = ZEBRA_ROUTE_KERNEL;
359 break;
360 case RTPROT_ZEBRA:
361 if (is_nexthop) {
362 proto = ZEBRA_ROUTE_NHG;
363 break;
364 }
365 /* Intentional fall thru */
366 default:
367 /*
368 * When a user adds a new protocol this will show up
369 * to let them know to do something about it. This
370 * is intentionally a warn because we should see
371 * this as part of development of a new protocol
372 */
373 zlog_debug(
374 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
375 __func__, proto);
376 proto = ZEBRA_ROUTE_KERNEL;
377 break;
378 }
379 return proto;
380 }
381
382 /*
383 Pending: create an efficient table_id (in a tree/hash) based lookup)
384 */
385 vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
386 {
387 struct vrf *vrf;
388 struct zebra_vrf *zvrf;
389
390 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
391 zvrf = vrf->info;
392 if (zvrf == NULL)
393 continue;
394 /* case vrf with netns : match the netnsid */
395 if (vrf_is_backend_netns()) {
396 if (ns_id == zvrf_id(zvrf))
397 return zvrf_id(zvrf);
398 } else {
399 /* VRF is VRF_BACKEND_VRF_LITE */
400 if (zvrf->table_id != table_id)
401 continue;
402 return zvrf_id(zvrf);
403 }
404 }
405
406 return VRF_DEFAULT;
407 }
408
409 /**
410 * @parse_encap_mpls() - Parses encapsulated mpls attributes
411 * @tb: Pointer to rtattr to look for nested items in.
412 * @labels: Pointer to store labels in.
413 *
414 * Return: Number of mpls labels found.
415 */
416 static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
417 {
418 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
419 mpls_lse_t *lses = NULL;
420 int num_labels = 0;
421 uint32_t ttl = 0;
422 uint32_t bos = 0;
423 uint32_t exp = 0;
424 mpls_label_t label = 0;
425
426 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
427 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
428 while (!bos && num_labels < MPLS_MAX_LABELS) {
429 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
430 labels[num_labels++] = label;
431 }
432
433 return num_labels;
434 }
435
436 static enum seg6local_action_t
437 parse_encap_seg6local(struct rtattr *tb,
438 struct seg6local_context *ctx)
439 {
440 struct rtattr *tb_encap[SEG6_LOCAL_MAX + 1] = {};
441 enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
442
443 netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_MAX, tb);
444
445 if (tb_encap[SEG6_LOCAL_ACTION])
446 act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]);
447
448 if (tb_encap[SEG6_LOCAL_NH4])
449 ctx->nh4 = *(struct in_addr *)RTA_DATA(
450 tb_encap[SEG6_LOCAL_NH4]);
451
452 if (tb_encap[SEG6_LOCAL_NH6])
453 ctx->nh6 = *(struct in6_addr *)RTA_DATA(
454 tb_encap[SEG6_LOCAL_NH6]);
455
456 if (tb_encap[SEG6_LOCAL_TABLE])
457 ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]);
458
459 if (tb_encap[SEG6_LOCAL_VRFTABLE])
460 ctx->table =
461 *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]);
462
463 return act;
464 }
465
466 static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs)
467 {
468 struct rtattr *tb_encap[SEG6_IPTUNNEL_MAX + 1] = {};
469 struct seg6_iptunnel_encap *ipt = NULL;
470 struct in6_addr *segments = NULL;
471
472 netlink_parse_rtattr_nested(tb_encap, SEG6_IPTUNNEL_MAX, tb);
473
474 /*
475 * TODO: It's not support multiple SID list.
476 */
477 if (tb_encap[SEG6_IPTUNNEL_SRH]) {
478 ipt = (struct seg6_iptunnel_encap *)
479 RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]);
480 segments = ipt->srh[0].segments;
481 *segs = segments[0];
482 return 1;
483 }
484
485 return 0;
486 }
487
488
489 static struct nexthop
490 parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
491 enum blackhole_type bh_type, int index, void *prefsrc,
492 void *gate, afi_t afi, vrf_id_t vrf_id)
493 {
494 struct interface *ifp = NULL;
495 struct nexthop nh = {0};
496 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
497 int num_labels = 0;
498 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
499 struct seg6local_context seg6l_ctx = {};
500 struct in6_addr seg6_segs = {};
501 int num_segs = 0;
502
503 vrf_id_t nh_vrf_id = vrf_id;
504 size_t sz = (afi == AFI_IP) ? 4 : 16;
505
506 if (bh_type == BLACKHOLE_UNSPEC) {
507 if (index && !gate)
508 nh.type = NEXTHOP_TYPE_IFINDEX;
509 else if (index && gate)
510 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
511 : NEXTHOP_TYPE_IPV6_IFINDEX;
512 else if (!index && gate)
513 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
514 : NEXTHOP_TYPE_IPV6;
515 else {
516 nh.type = NEXTHOP_TYPE_BLACKHOLE;
517 nh.bh_type = bh_type;
518 }
519 } else {
520 nh.type = NEXTHOP_TYPE_BLACKHOLE;
521 nh.bh_type = bh_type;
522 }
523 nh.ifindex = index;
524 if (prefsrc)
525 memcpy(&nh.src, prefsrc, sz);
526 if (gate)
527 memcpy(&nh.gate, gate, sz);
528
529 if (index) {
530 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
531 if (ifp)
532 nh_vrf_id = ifp->vrf->vrf_id;
533 }
534 nh.vrf_id = nh_vrf_id;
535
536 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
537 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
538 == LWTUNNEL_ENCAP_MPLS) {
539 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
540 }
541 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
542 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
543 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
544 seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx);
545 }
546 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
547 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
548 == LWTUNNEL_ENCAP_SEG6) {
549 num_segs = parse_encap_seg6(tb[RTA_ENCAP], &seg6_segs);
550 }
551
552 if (rtm->rtm_flags & RTNH_F_ONLINK)
553 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
554
555 if (rtm->rtm_flags & RTNH_F_LINKDOWN)
556 SET_FLAG(nh.flags, NEXTHOP_FLAG_LINKDOWN);
557
558 if (num_labels)
559 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
560
561 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
562 nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx);
563
564 if (num_segs)
565 nexthop_add_srv6_seg6(&nh, &seg6_segs);
566
567 return nh;
568 }
569
570 static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
571 struct nexthop_group *ng,
572 struct rtmsg *rtm,
573 struct rtnexthop *rtnh,
574 struct rtattr **tb,
575 void *prefsrc, vrf_id_t vrf_id)
576 {
577 void *gate = NULL;
578 struct interface *ifp = NULL;
579 int index = 0;
580 /* MPLS labels */
581 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
582 int num_labels = 0;
583 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
584 struct seg6local_context seg6l_ctx = {};
585 struct in6_addr seg6_segs = {};
586 int num_segs = 0;
587 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
588
589 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
590 vrf_id_t nh_vrf_id = vrf_id;
591
592 for (;;) {
593 struct nexthop *nh = NULL;
594
595 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
596 break;
597
598 index = rtnh->rtnh_ifindex;
599 if (index) {
600 /*
601 * Yes we are looking this up
602 * for every nexthop and just
603 * using the last one looked
604 * up right now
605 */
606 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
607 index);
608 if (ifp)
609 nh_vrf_id = ifp->vrf->vrf_id;
610 else {
611 flog_warn(
612 EC_ZEBRA_UNKNOWN_INTERFACE,
613 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
614 __func__, index);
615 nh_vrf_id = VRF_DEFAULT;
616 }
617 } else
618 nh_vrf_id = vrf_id;
619
620 if (rtnh->rtnh_len > sizeof(*rtnh)) {
621 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
622 rtnh->rtnh_len - sizeof(*rtnh));
623 if (rtnh_tb[RTA_GATEWAY])
624 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
625 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
626 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
627 == LWTUNNEL_ENCAP_MPLS) {
628 num_labels = parse_encap_mpls(
629 rtnh_tb[RTA_ENCAP], labels);
630 }
631 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
632 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
633 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
634 seg6l_act = parse_encap_seg6local(
635 rtnh_tb[RTA_ENCAP], &seg6l_ctx);
636 }
637 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
638 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
639 == LWTUNNEL_ENCAP_SEG6) {
640 num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP],
641 &seg6_segs);
642 }
643 }
644
645 if (gate && rtm->rtm_family == AF_INET) {
646 if (index)
647 nh = nexthop_from_ipv4_ifindex(
648 gate, prefsrc, index, nh_vrf_id);
649 else
650 nh = nexthop_from_ipv4(gate, prefsrc,
651 nh_vrf_id);
652 } else if (gate && rtm->rtm_family == AF_INET6) {
653 if (index)
654 nh = nexthop_from_ipv6_ifindex(
655 gate, index, nh_vrf_id);
656 else
657 nh = nexthop_from_ipv6(gate, nh_vrf_id);
658 } else
659 nh = nexthop_from_ifindex(index, nh_vrf_id);
660
661 if (nh) {
662 nh->weight = rtnh->rtnh_hops + 1;
663
664 if (num_labels)
665 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
666 num_labels, labels);
667
668 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
669 nexthop_add_srv6_seg6local(nh, seg6l_act,
670 &seg6l_ctx);
671
672 if (num_segs)
673 nexthop_add_srv6_seg6(nh, &seg6_segs);
674
675 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
676 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
677
678 /* Add to temporary list */
679 nexthop_group_add_sorted(ng, nh);
680 }
681
682 if (rtnh->rtnh_len == 0)
683 break;
684
685 len -= NLMSG_ALIGN(rtnh->rtnh_len);
686 rtnh = RTNH_NEXT(rtnh);
687 }
688
689 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
690
691 return nhop_num;
692 }
693
694 /* Looking up routing table by netlink interface. */
695 int netlink_route_change_read_unicast_internal(struct nlmsghdr *h,
696 ns_id_t ns_id, int startup,
697 struct zebra_dplane_ctx *ctx)
698 {
699 int len;
700 struct rtmsg *rtm;
701 struct rtattr *tb[RTA_MAX + 1];
702 uint32_t flags = 0;
703 struct prefix p;
704 struct prefix_ipv6 src_p = {};
705 vrf_id_t vrf_id;
706 bool selfroute;
707
708 char anyaddr[16] = {0};
709
710 int proto = ZEBRA_ROUTE_KERNEL;
711 int index = 0;
712 int table;
713 int metric = 0;
714 uint32_t mtu = 0;
715 uint8_t distance = 0;
716 route_tag_t tag = 0;
717 uint32_t nhe_id = 0;
718
719 void *dest = NULL;
720 void *gate = NULL;
721 void *prefsrc = NULL; /* IPv4 preferred source host address */
722 void *src = NULL; /* IPv6 srcdest source prefix */
723 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
724
725 frrtrace(3, frr_zebra, netlink_route_change_read_unicast, h, ns_id,
726 startup);
727
728 rtm = NLMSG_DATA(h);
729
730 if (startup && h->nlmsg_type != RTM_NEWROUTE)
731 return 0;
732 switch (rtm->rtm_type) {
733 case RTN_UNICAST:
734 break;
735 case RTN_BLACKHOLE:
736 bh_type = BLACKHOLE_NULL;
737 break;
738 case RTN_UNREACHABLE:
739 bh_type = BLACKHOLE_REJECT;
740 break;
741 case RTN_PROHIBIT:
742 bh_type = BLACKHOLE_ADMINPROHIB;
743 break;
744 default:
745 if (IS_ZEBRA_DEBUG_KERNEL)
746 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
747 nl_rttype_to_str(rtm->rtm_type),
748 rtm->rtm_type);
749 return 0;
750 }
751
752 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
753 if (len < 0) {
754 zlog_err(
755 "%s: Message received from netlink is of a broken size %d %zu",
756 __func__, h->nlmsg_len,
757 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
758 return -1;
759 }
760
761 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
762
763 if (rtm->rtm_flags & RTM_F_CLONED)
764 return 0;
765 if (rtm->rtm_protocol == RTPROT_REDIRECT)
766 return 0;
767 if (rtm->rtm_protocol == RTPROT_KERNEL)
768 return 0;
769
770 selfroute = is_selfroute(rtm->rtm_protocol);
771
772 if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE &&
773 !zrouter.asic_offloaded && !ctx) {
774 if (IS_ZEBRA_DEBUG_KERNEL)
775 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
776 rtm->rtm_protocol);
777 return 0;
778 }
779
780 /* We don't care about change notifications for the MPLS table. */
781 /* TODO: Revisit this. */
782 if (rtm->rtm_family == AF_MPLS)
783 return 0;
784
785 /* Table corresponding to route. */
786 if (tb[RTA_TABLE])
787 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
788 else
789 table = rtm->rtm_table;
790
791 /* Map to VRF */
792 vrf_id = vrf_lookup_by_table(table, ns_id);
793 if (vrf_id == VRF_DEFAULT) {
794 if (!is_zebra_valid_kernel_table(table)
795 && !is_zebra_main_routing_table(table))
796 return 0;
797 }
798
799 if (rtm->rtm_flags & RTM_F_TRAP)
800 flags |= ZEBRA_FLAG_TRAPPED;
801 if (rtm->rtm_flags & RTM_F_OFFLOAD)
802 flags |= ZEBRA_FLAG_OFFLOADED;
803 if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED)
804 flags |= ZEBRA_FLAG_OFFLOAD_FAILED;
805
806 if (h->nlmsg_flags & NLM_F_APPEND)
807 flags |= ZEBRA_FLAG_OUTOFSYNC;
808
809 /* Route which inserted by Zebra. */
810 if (selfroute) {
811 flags |= ZEBRA_FLAG_SELFROUTE;
812 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
813 }
814 if (tb[RTA_OIF])
815 index = *(int *)RTA_DATA(tb[RTA_OIF]);
816
817 if (tb[RTA_DST])
818 dest = RTA_DATA(tb[RTA_DST]);
819 else
820 dest = anyaddr;
821
822 if (tb[RTA_SRC])
823 src = RTA_DATA(tb[RTA_SRC]);
824 else
825 src = anyaddr;
826
827 if (tb[RTA_PREFSRC])
828 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
829
830 if (tb[RTA_GATEWAY])
831 gate = RTA_DATA(tb[RTA_GATEWAY]);
832
833 if (tb[RTA_NH_ID])
834 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
835
836 if (tb[RTA_PRIORITY])
837 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
838
839 #if defined(SUPPORT_REALMS)
840 if (tb[RTA_FLOW])
841 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
842 #endif
843
844 if (tb[RTA_METRICS]) {
845 struct rtattr *mxrta[RTAX_MAX + 1];
846
847 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
848 RTA_PAYLOAD(tb[RTA_METRICS]));
849
850 if (mxrta[RTAX_MTU])
851 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
852 }
853
854 if (rtm->rtm_family == AF_INET) {
855 p.family = AF_INET;
856 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
857 zlog_err(
858 "Invalid destination prefix length: %u received from kernel route change",
859 rtm->rtm_dst_len);
860 return -1;
861 }
862 memcpy(&p.u.prefix4, dest, 4);
863 p.prefixlen = rtm->rtm_dst_len;
864
865 if (rtm->rtm_src_len != 0) {
866 flog_warn(
867 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
868 "unsupported IPv4 sourcedest route (dest %pFX vrf %u)",
869 &p, vrf_id);
870 return 0;
871 }
872
873 /* Force debug below to not display anything for source */
874 src_p.prefixlen = 0;
875 } else if (rtm->rtm_family == AF_INET6) {
876 p.family = AF_INET6;
877 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
878 zlog_err(
879 "Invalid destination prefix length: %u received from kernel route change",
880 rtm->rtm_dst_len);
881 return -1;
882 }
883 memcpy(&p.u.prefix6, dest, 16);
884 p.prefixlen = rtm->rtm_dst_len;
885
886 src_p.family = AF_INET6;
887 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
888 zlog_err(
889 "Invalid source prefix length: %u received from kernel route change",
890 rtm->rtm_src_len);
891 return -1;
892 }
893 memcpy(&src_p.prefix, src, 16);
894 src_p.prefixlen = rtm->rtm_src_len;
895 } else {
896 /* We only handle the AFs we handle... */
897 if (IS_ZEBRA_DEBUG_KERNEL)
898 zlog_debug("%s: unknown address-family %u", __func__,
899 rtm->rtm_family);
900 return 0;
901 }
902
903 /*
904 * For ZEBRA_ROUTE_KERNEL types:
905 *
906 * The metric/priority of the route received from the kernel
907 * is a 32 bit number. We are going to interpret the high
908 * order byte as the Admin Distance and the low order 3 bytes
909 * as the metric.
910 *
911 * This will allow us to do two things:
912 * 1) Allow the creation of kernel routes that can be
913 * overridden by zebra.
914 * 2) Allow the old behavior for 'most' kernel route types
915 * if a user enters 'ip route ...' v4 routes get a metric
916 * of 0 and v6 routes get a metric of 1024. Both of these
917 * values will end up with a admin distance of 0, which
918 * will cause them to win for the purposes of zebra.
919 */
920 if (proto == ZEBRA_ROUTE_KERNEL) {
921 distance = (metric >> 24) & 0xFF;
922 metric = (metric & 0x00FFFFFF);
923 }
924
925 if (IS_ZEBRA_DEBUG_KERNEL) {
926 char buf2[PREFIX_STRLEN];
927
928 zlog_debug(
929 "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
930 nl_msg_type_to_str(h->nlmsg_type), &p,
931 src_p.prefixlen ? " from " : "",
932 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
933 : "",
934 vrf_id_to_name(vrf_id), vrf_id, table, metric,
935 distance);
936 }
937
938 afi_t afi = AFI_IP;
939 if (rtm->rtm_family == AF_INET6)
940 afi = AFI_IP6;
941
942 if (h->nlmsg_type == RTM_NEWROUTE) {
943 struct route_entry *re;
944 struct nexthop_group *ng = NULL;
945
946 re = zebra_rib_route_entry_new(vrf_id, proto, 0, flags, nhe_id,
947 table, metric, mtu, distance,
948 tag);
949 if (!nhe_id)
950 ng = nexthop_group_new();
951
952 if (!tb[RTA_MULTIPATH]) {
953 struct nexthop *nexthop, nh;
954
955 if (!nhe_id) {
956 nh = parse_nexthop_unicast(
957 ns_id, rtm, tb, bh_type, index, prefsrc,
958 gate, afi, vrf_id);
959
960 nexthop = nexthop_new();
961 *nexthop = nh;
962 nexthop_group_add_sorted(ng, nexthop);
963 }
964 } else {
965 /* This is a multipath route */
966 struct rtnexthop *rtnh =
967 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
968
969 if (!nhe_id) {
970 uint8_t nhop_num;
971
972 /* Use temporary list of nexthops; parse
973 * message payload's nexthops.
974 */
975 nhop_num =
976 parse_multipath_nexthops_unicast(
977 ns_id, ng, rtm, rtnh, tb,
978 prefsrc, vrf_id);
979
980 zserv_nexthop_num_warn(
981 __func__, (const struct prefix *)&p,
982 nhop_num);
983
984 if (nhop_num == 0) {
985 nexthop_group_delete(&ng);
986 ng = NULL;
987 }
988 }
989 }
990 if (nhe_id || ng) {
991 dplane_rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p,
992 re, ng, startup, ctx);
993 if (ng)
994 nexthop_group_delete(&ng);
995 } else {
996 /*
997 * I really don't see how this is possible
998 * but since we are testing for it let's
999 * let the end user know why the route
1000 * that was just received was swallowed
1001 * up and forgotten
1002 */
1003 zlog_err(
1004 "%s: %pFX multipath RTM_NEWROUTE has a invalid nexthop group from the kernel",
1005 __func__, &p);
1006 XFREE(MTYPE_RE, re);
1007 }
1008 } else {
1009 if (ctx) {
1010 zlog_err(
1011 "%s: %pFX RTM_DELROUTE received but received a context as well",
1012 __func__, &p);
1013 return 0;
1014 }
1015
1016 if (nhe_id) {
1017 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
1018 &p, &src_p, NULL, nhe_id, table, metric,
1019 distance, true);
1020 } else {
1021 if (!tb[RTA_MULTIPATH]) {
1022 struct nexthop nh;
1023
1024 nh = parse_nexthop_unicast(
1025 ns_id, rtm, tb, bh_type, index, prefsrc,
1026 gate, afi, vrf_id);
1027 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1028 flags, &p, &src_p, &nh, 0, table,
1029 metric, distance, true);
1030 } else {
1031 /* XXX: need to compare the entire list of
1032 * nexthops here for NLM_F_APPEND stupidity */
1033 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1034 flags, &p, &src_p, NULL, 0, table,
1035 metric, distance, true);
1036 }
1037 }
1038 }
1039
1040 return 1;
1041 }
1042
1043 static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
1044 int startup)
1045 {
1046 return netlink_route_change_read_unicast_internal(h, ns_id, startup,
1047 NULL);
1048 }
1049
1050 static struct mcast_route_data *mroute = NULL;
1051
1052 static int netlink_route_change_read_multicast(struct nlmsghdr *h,
1053 ns_id_t ns_id, int startup)
1054 {
1055 int len;
1056 struct rtmsg *rtm;
1057 struct rtattr *tb[RTA_MAX + 1];
1058 struct mcast_route_data *m;
1059 int iif = 0;
1060 int count;
1061 int oif[256];
1062 int oif_count = 0;
1063 char oif_list[256] = "\0";
1064 vrf_id_t vrf;
1065 int table;
1066
1067 assert(mroute);
1068 m = mroute;
1069
1070 rtm = NLMSG_DATA(h);
1071
1072 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1073
1074 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
1075
1076 if (tb[RTA_TABLE])
1077 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
1078 else
1079 table = rtm->rtm_table;
1080
1081 vrf = vrf_lookup_by_table(table, ns_id);
1082
1083 if (tb[RTA_IIF])
1084 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
1085
1086 if (tb[RTA_SRC]) {
1087 if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1088 m->src.ipaddr_v4 =
1089 *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
1090 else
1091 m->src.ipaddr_v6 =
1092 *(struct in6_addr *)RTA_DATA(tb[RTA_SRC]);
1093 }
1094
1095 if (tb[RTA_DST]) {
1096 if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1097 m->grp.ipaddr_v4 =
1098 *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
1099 else
1100 m->grp.ipaddr_v6 =
1101 *(struct in6_addr *)RTA_DATA(tb[RTA_DST]);
1102 }
1103
1104 if (tb[RTA_EXPIRES])
1105 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
1106
1107 if (tb[RTA_MULTIPATH]) {
1108 struct rtnexthop *rtnh =
1109 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
1110
1111 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
1112 for (;;) {
1113 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
1114 break;
1115
1116 oif[oif_count] = rtnh->rtnh_ifindex;
1117 oif_count++;
1118
1119 if (rtnh->rtnh_len == 0)
1120 break;
1121
1122 len -= NLMSG_ALIGN(rtnh->rtnh_len);
1123 rtnh = RTNH_NEXT(rtnh);
1124 }
1125 }
1126
1127 if (rtm->rtm_family == RTNL_FAMILY_IPMR) {
1128 SET_IPADDR_V4(&m->src);
1129 SET_IPADDR_V4(&m->grp);
1130 } else if (rtm->rtm_family == RTNL_FAMILY_IP6MR) {
1131 SET_IPADDR_V6(&m->src);
1132 SET_IPADDR_V6(&m->grp);
1133 } else {
1134 zlog_warn("%s: Invalid rtm_family received", __func__);
1135 return 0;
1136 }
1137
1138 if (IS_ZEBRA_DEBUG_KERNEL) {
1139 struct interface *ifp = NULL;
1140 struct zebra_vrf *zvrf = NULL;
1141
1142 for (count = 0; count < oif_count; count++) {
1143 ifp = if_lookup_by_index(oif[count], vrf);
1144 char temp[256];
1145
1146 snprintf(temp, sizeof(temp), "%s(%d) ",
1147 ifp ? ifp->name : "Unknown", oif[count]);
1148 strlcat(oif_list, temp, sizeof(oif_list));
1149 }
1150 zvrf = zebra_vrf_lookup_by_id(vrf);
1151 ifp = if_lookup_by_index(iif, vrf);
1152 zlog_debug(
1153 "MCAST VRF: %s(%d) %s (%pIA,%pIA) IIF: %s(%d) OIF: %s jiffies: %lld",
1154 zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
1155 &m->src, &m->grp, ifp ? ifp->name : "Unknown", iif,
1156 oif_list, m->lastused);
1157 }
1158 return 0;
1159 }
1160
1161 int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
1162 {
1163 int len;
1164 struct rtmsg *rtm;
1165
1166 rtm = NLMSG_DATA(h);
1167
1168 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
1169 /* If this is not route add/delete message print warning. */
1170 zlog_debug("Kernel message: %s NS %u",
1171 nl_msg_type_to_str(h->nlmsg_type), ns_id);
1172 return 0;
1173 }
1174
1175 switch (rtm->rtm_family) {
1176 case AF_INET:
1177 case AF_INET6:
1178 break;
1179
1180 case RTNL_FAMILY_IPMR:
1181 case RTNL_FAMILY_IP6MR:
1182 /* notifications on IPMR are irrelevant to zebra, we only care
1183 * about responses to RTM_GETROUTE requests we sent.
1184 */
1185 return 0;
1186
1187 default:
1188 flog_warn(
1189 EC_ZEBRA_UNKNOWN_FAMILY,
1190 "Invalid address family: %u received from kernel route change: %s",
1191 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
1192 return 0;
1193 }
1194
1195 /* Connected route. */
1196 if (IS_ZEBRA_DEBUG_KERNEL)
1197 zlog_debug("%s %s %s proto %s NS %u",
1198 nl_msg_type_to_str(h->nlmsg_type),
1199 nl_family_to_str(rtm->rtm_family),
1200 nl_rttype_to_str(rtm->rtm_type),
1201 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
1202
1203
1204 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1205 if (len < 0) {
1206 zlog_err(
1207 "%s: Message received from netlink is of a broken size: %d %zu",
1208 __func__, h->nlmsg_len,
1209 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
1210 return -1;
1211 }
1212
1213 /* these are "magic" kernel-managed *unicast* routes used for
1214 * outputting locally generated multicast traffic (which uses unicast
1215 * handling on Linux because ~reasons~.
1216 */
1217 if (rtm->rtm_type == RTN_MULTICAST)
1218 return 0;
1219
1220 netlink_route_change_read_unicast(h, ns_id, startup);
1221 return 0;
1222 }
1223
1224 /* Request for specific route information from the kernel */
1225 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
1226 {
1227 struct {
1228 struct nlmsghdr n;
1229 struct rtmsg rtm;
1230 } req;
1231
1232 /* Form the request, specifying filter (rtattr) if needed. */
1233 memset(&req, 0, sizeof(req));
1234 req.n.nlmsg_type = type;
1235 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
1236 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1237 req.rtm.rtm_family = family;
1238
1239 return netlink_request(&zns->netlink_cmd, &req);
1240 }
1241
1242 /* Routing table read function using netlink interface. Only called
1243 bootstrap time. */
1244 int netlink_route_read(struct zebra_ns *zns)
1245 {
1246 int ret;
1247 struct zebra_dplane_info dp_info;
1248
1249 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
1250
1251 /* Get IPv4 routing table. */
1252 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1253 if (ret < 0)
1254 return ret;
1255 ret = netlink_parse_info(netlink_route_change_read_unicast,
1256 &zns->netlink_cmd, &dp_info, 0, true);
1257 if (ret < 0)
1258 return ret;
1259
1260 /* Get IPv6 routing table. */
1261 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1262 if (ret < 0)
1263 return ret;
1264 ret = netlink_parse_info(netlink_route_change_read_unicast,
1265 &zns->netlink_cmd, &dp_info, 0, true);
1266 if (ret < 0)
1267 return ret;
1268
1269 return 0;
1270 }
1271
1272 /*
1273 * The function returns true if the gateway info could be added
1274 * to the message, otherwise false is returned.
1275 */
1276 static bool _netlink_route_add_gateway_info(uint8_t route_family,
1277 uint8_t gw_family,
1278 struct nlmsghdr *nlmsg,
1279 size_t req_size, int bytelen,
1280 const struct nexthop *nexthop)
1281 {
1282 if (route_family == AF_MPLS) {
1283 struct gw_family_t gw_fam;
1284
1285 gw_fam.family = gw_family;
1286 if (gw_family == AF_INET)
1287 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1288 else
1289 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1290 if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1291 bytelen + 2))
1292 return false;
1293 } else {
1294 if (!(nexthop->rparent
1295 && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1296 if (gw_family == AF_INET) {
1297 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1298 &nexthop->gate.ipv4, bytelen))
1299 return false;
1300 } else {
1301 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1302 &nexthop->gate.ipv6, bytelen))
1303 return false;
1304 }
1305 }
1306 }
1307
1308 return true;
1309 }
1310
1311 static int build_label_stack(struct mpls_label_stack *nh_label,
1312 mpls_lse_t *out_lse, char *label_buf,
1313 size_t label_buf_size)
1314 {
1315 char label_buf1[20];
1316 int num_labels = 0;
1317
1318 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1319 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1320 continue;
1321
1322 if (IS_ZEBRA_DEBUG_KERNEL) {
1323 if (!num_labels)
1324 snprintf(label_buf, label_buf_size, "label %u",
1325 nh_label->label[i]);
1326 else {
1327 snprintf(label_buf1, sizeof(label_buf1), "/%u",
1328 nh_label->label[i]);
1329 strlcat(label_buf, label_buf1, label_buf_size);
1330 }
1331 }
1332
1333 out_lse[num_labels] =
1334 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1335 num_labels++;
1336 }
1337
1338 return num_labels;
1339 }
1340
1341 static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label,
1342 struct nlmsghdr *nlmsg,
1343 size_t buflen, struct rtmsg *rtmsg,
1344 char *label_buf,
1345 size_t label_buf_size)
1346 {
1347 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1348 int num_labels;
1349
1350 /*
1351 * label_buf is *only* currently used within debugging.
1352 * As such when we assign it we are guarding it inside
1353 * a debug test. If you want to change this make sure
1354 * you fix this assumption
1355 */
1356 label_buf[0] = '\0';
1357
1358 num_labels =
1359 build_label_stack(nh_label, out_lse, label_buf, label_buf_size);
1360
1361 if (num_labels) {
1362 /* Set the BoS bit */
1363 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1364
1365 if (rtmsg->rtm_family == AF_MPLS) {
1366 if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
1367 num_labels * sizeof(mpls_lse_t)))
1368 return false;
1369 } else {
1370 struct rtattr *nest;
1371
1372 if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1373 LWTUNNEL_ENCAP_MPLS))
1374 return false;
1375
1376 nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
1377 if (!nest)
1378 return false;
1379
1380 if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
1381 &out_lse,
1382 num_labels * sizeof(mpls_lse_t)))
1383 return false;
1384 nl_attr_nest_end(nlmsg, nest);
1385 }
1386 }
1387
1388 return true;
1389 }
1390
1391 static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1392 int family,
1393 struct nlmsghdr *nlmsg,
1394 size_t buflen, int bytelen)
1395 {
1396 if (family == AF_INET) {
1397 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1398 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1399 &nexthop->rmap_src.ipv4, bytelen))
1400 return false;
1401 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1402 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1403 &nexthop->src.ipv4, bytelen))
1404 return false;
1405 }
1406 } else if (family == AF_INET6) {
1407 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1408 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1409 &nexthop->rmap_src.ipv6, bytelen))
1410 return false;
1411 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1412 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1413 &nexthop->src.ipv6, bytelen))
1414 return false;
1415 }
1416 }
1417
1418 return true;
1419 }
1420
1421 static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen,
1422 const struct in6_addr *seg)
1423 {
1424 struct seg6_iptunnel_encap *ipt;
1425 struct ipv6_sr_hdr *srh;
1426 const size_t srhlen = 24;
1427
1428 /*
1429 * Caution: Support only SINGLE-SID, not MULTI-SID
1430 * This function only supports the case where segs represents
1431 * a single SID. If you want to extend the SRv6 functionality,
1432 * you should improve the Boundary Check.
1433 * Ex. In case of set a SID-List include multiple-SIDs as an
1434 * argument of the Transit Behavior, we must support variable
1435 * boundary check for buflen.
1436 */
1437 if (buflen < (sizeof(struct seg6_iptunnel_encap) +
1438 sizeof(struct ipv6_sr_hdr) + 16))
1439 return -1;
1440
1441 memset(buffer, 0, buflen);
1442
1443 ipt = (struct seg6_iptunnel_encap *)buffer;
1444 ipt->mode = SEG6_IPTUN_MODE_ENCAP;
1445 srh = ipt->srh;
1446 srh->hdrlen = (srhlen >> 3) - 1;
1447 srh->type = 4;
1448 srh->segments_left = 0;
1449 srh->first_segment = 0;
1450 memcpy(&srh->segments[0], seg, sizeof(struct in6_addr));
1451
1452 return srhlen + 4;
1453 }
1454
1455 /* This function takes a nexthop as argument and adds
1456 * the appropriate netlink attributes to an existing
1457 * netlink message.
1458 *
1459 * @param routedesc: Human readable description of route type
1460 * (direct/recursive, single-/multipath)
1461 * @param bytelen: Length of addresses in bytes.
1462 * @param nexthop: Nexthop information
1463 * @param nlmsg: nlmsghdr structure to fill in.
1464 * @param req_size: The size allocated for the message.
1465 *
1466 * The function returns true if the nexthop could be added
1467 * to the message, otherwise false is returned.
1468 */
1469 static bool _netlink_route_build_singlepath(const struct prefix *p,
1470 const char *routedesc, int bytelen,
1471 const struct nexthop *nexthop,
1472 struct nlmsghdr *nlmsg,
1473 struct rtmsg *rtmsg,
1474 size_t req_size, int cmd)
1475 {
1476
1477 char label_buf[256];
1478 struct vrf *vrf;
1479 char addrstr[INET6_ADDRSTRLEN];
1480
1481 assert(nexthop);
1482
1483 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1484
1485 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1486 req_size, rtmsg, label_buf,
1487 sizeof(label_buf)))
1488 return false;
1489
1490 if (nexthop->nh_srv6) {
1491 if (nexthop->nh_srv6->seg6local_action !=
1492 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
1493 struct rtattr *nest;
1494 const struct seg6local_context *ctx;
1495
1496 ctx = &nexthop->nh_srv6->seg6local_ctx;
1497 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1498 LWTUNNEL_ENCAP_SEG6_LOCAL))
1499 return false;
1500
1501 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1502 if (!nest)
1503 return false;
1504
1505 switch (nexthop->nh_srv6->seg6local_action) {
1506 case ZEBRA_SEG6_LOCAL_ACTION_END:
1507 if (!nl_attr_put32(nlmsg, req_size,
1508 SEG6_LOCAL_ACTION,
1509 SEG6_LOCAL_ACTION_END))
1510 return false;
1511 break;
1512 case ZEBRA_SEG6_LOCAL_ACTION_END_X:
1513 if (!nl_attr_put32(nlmsg, req_size,
1514 SEG6_LOCAL_ACTION,
1515 SEG6_LOCAL_ACTION_END_X))
1516 return false;
1517 if (!nl_attr_put(nlmsg, req_size,
1518 SEG6_LOCAL_NH6, &ctx->nh6,
1519 sizeof(struct in6_addr)))
1520 return false;
1521 break;
1522 case ZEBRA_SEG6_LOCAL_ACTION_END_T:
1523 if (!nl_attr_put32(nlmsg, req_size,
1524 SEG6_LOCAL_ACTION,
1525 SEG6_LOCAL_ACTION_END_T))
1526 return false;
1527 if (!nl_attr_put32(nlmsg, req_size,
1528 SEG6_LOCAL_TABLE,
1529 ctx->table))
1530 return false;
1531 break;
1532 case ZEBRA_SEG6_LOCAL_ACTION_END_DX4:
1533 if (!nl_attr_put32(nlmsg, req_size,
1534 SEG6_LOCAL_ACTION,
1535 SEG6_LOCAL_ACTION_END_DX4))
1536 return false;
1537 if (!nl_attr_put(nlmsg, req_size,
1538 SEG6_LOCAL_NH4, &ctx->nh4,
1539 sizeof(struct in_addr)))
1540 return false;
1541 break;
1542 case ZEBRA_SEG6_LOCAL_ACTION_END_DT6:
1543 if (!nl_attr_put32(nlmsg, req_size,
1544 SEG6_LOCAL_ACTION,
1545 SEG6_LOCAL_ACTION_END_DT6))
1546 return false;
1547 if (!nl_attr_put32(nlmsg, req_size,
1548 SEG6_LOCAL_TABLE,
1549 ctx->table))
1550 return false;
1551 break;
1552 case ZEBRA_SEG6_LOCAL_ACTION_END_DT4:
1553 if (!nl_attr_put32(nlmsg, req_size,
1554 SEG6_LOCAL_ACTION,
1555 SEG6_LOCAL_ACTION_END_DT4))
1556 return false;
1557 if (!nl_attr_put32(nlmsg, req_size,
1558 SEG6_LOCAL_VRFTABLE,
1559 ctx->table))
1560 return false;
1561 break;
1562 case ZEBRA_SEG6_LOCAL_ACTION_END_DT46:
1563 if (!nl_attr_put32(nlmsg, req_size,
1564 SEG6_LOCAL_ACTION,
1565 SEG6_LOCAL_ACTION_END_DT46))
1566 return false;
1567 if (!nl_attr_put32(nlmsg, req_size,
1568 SEG6_LOCAL_VRFTABLE,
1569 ctx->table))
1570 return false;
1571 break;
1572 default:
1573 zlog_err("%s: unsupport seg6local behaviour action=%u",
1574 __func__,
1575 nexthop->nh_srv6->seg6local_action);
1576 return false;
1577 }
1578 nl_attr_nest_end(nlmsg, nest);
1579 }
1580
1581 if (!sid_zero(&nexthop->nh_srv6->seg6_segs)) {
1582 char tun_buf[4096];
1583 ssize_t tun_len;
1584 struct rtattr *nest;
1585
1586 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1587 LWTUNNEL_ENCAP_SEG6))
1588 return false;
1589 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1590 if (!nest)
1591 return false;
1592 tun_len = fill_seg6ipt_encap(tun_buf, sizeof(tun_buf),
1593 &nexthop->nh_srv6->seg6_segs);
1594 if (tun_len < 0)
1595 return false;
1596 if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH,
1597 tun_buf, tun_len))
1598 return false;
1599 nl_attr_nest_end(nlmsg, nest);
1600 }
1601 }
1602
1603 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1604 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1605
1606 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1607 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1608 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1609 return false;
1610 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1611 return false;
1612
1613 if (cmd == RTM_NEWROUTE) {
1614 if (!_netlink_route_encode_nexthop_src(
1615 nexthop, AF_INET, nlmsg, req_size, bytelen))
1616 return false;
1617 }
1618
1619 if (IS_ZEBRA_DEBUG_KERNEL)
1620 zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1621 __func__, routedesc, p, ipv4_ll_buf,
1622 label_buf, nexthop->ifindex,
1623 VRF_LOGNAME(vrf), nexthop->vrf_id);
1624 return true;
1625 }
1626
1627 if (nexthop->type == NEXTHOP_TYPE_IPV4
1628 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1629 /* Send deletes to the kernel without specifying the next-hop */
1630 if (cmd != RTM_DELROUTE) {
1631 if (!_netlink_route_add_gateway_info(
1632 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1633 bytelen, nexthop))
1634 return false;
1635 }
1636
1637 if (cmd == RTM_NEWROUTE) {
1638 if (!_netlink_route_encode_nexthop_src(
1639 nexthop, AF_INET, nlmsg, req_size, bytelen))
1640 return false;
1641 }
1642
1643 if (IS_ZEBRA_DEBUG_KERNEL) {
1644 inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1645 sizeof(addrstr));
1646 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1647 __func__, routedesc, p, addrstr, label_buf,
1648 nexthop->ifindex, VRF_LOGNAME(vrf),
1649 nexthop->vrf_id);
1650 }
1651 }
1652
1653 if (nexthop->type == NEXTHOP_TYPE_IPV6
1654 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1655 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1656 AF_INET6, nlmsg, req_size,
1657 bytelen, nexthop))
1658 return false;
1659
1660 if (cmd == RTM_NEWROUTE) {
1661 if (!_netlink_route_encode_nexthop_src(
1662 nexthop, AF_INET6, nlmsg, req_size,
1663 bytelen))
1664 return false;
1665 }
1666
1667 if (IS_ZEBRA_DEBUG_KERNEL) {
1668 inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1669 sizeof(addrstr));
1670 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1671 __func__, routedesc, p, addrstr, label_buf,
1672 nexthop->ifindex, VRF_LOGNAME(vrf),
1673 nexthop->vrf_id);
1674 }
1675 }
1676
1677 /*
1678 * We have the ifindex so we should always send it
1679 * This is especially useful if we are doing route
1680 * leaking.
1681 */
1682 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1683 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1684 return false;
1685 }
1686
1687 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1688 if (cmd == RTM_NEWROUTE) {
1689 if (!_netlink_route_encode_nexthop_src(
1690 nexthop, AF_INET, nlmsg, req_size, bytelen))
1691 return false;
1692 }
1693
1694 if (IS_ZEBRA_DEBUG_KERNEL)
1695 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1696 __func__, routedesc, p, nexthop->ifindex,
1697 VRF_LOGNAME(vrf), nexthop->vrf_id);
1698 }
1699
1700 return true;
1701 }
1702
1703 /* This function appends tag value as rtnl flow attribute
1704 * to the given netlink msg only if value is less than 256.
1705 * Used only if SUPPORT_REALMS enabled.
1706 *
1707 * @param nlmsg: nlmsghdr structure to fill in.
1708 * @param maxlen: The size allocated for the message.
1709 * @param tag: The route tag.
1710 *
1711 * The function returns true if the flow attribute could
1712 * be added to the message, otherwise false is returned.
1713 */
1714 static inline bool _netlink_set_tag(struct nlmsghdr *n, unsigned int maxlen,
1715 route_tag_t tag)
1716 {
1717 if (tag > 0 && tag <= 255) {
1718 if (!nl_attr_put32(n, maxlen, RTA_FLOW, tag))
1719 return false;
1720 }
1721 return true;
1722 }
1723
1724 /* This function takes a nexthop as argument and
1725 * appends to the given netlink msg. If the nexthop
1726 * defines a preferred source, the src parameter
1727 * will be modified to point to that src, otherwise
1728 * it will be kept unmodified.
1729 *
1730 * @param routedesc: Human readable description of route type
1731 * (direct/recursive, single-/multipath)
1732 * @param bytelen: Length of addresses in bytes.
1733 * @param nexthop: Nexthop information
1734 * @param nlmsg: nlmsghdr structure to fill in.
1735 * @param req_size: The size allocated for the message.
1736 * @param src: pointer pointing to a location where
1737 * the prefsrc should be stored.
1738 *
1739 * The function returns true if the nexthop could be added
1740 * to the message, otherwise false is returned.
1741 */
1742 static bool _netlink_route_build_multipath(
1743 const struct prefix *p, const char *routedesc, int bytelen,
1744 const struct nexthop *nexthop, struct nlmsghdr *nlmsg, size_t req_size,
1745 struct rtmsg *rtmsg, const union g_addr **src, route_tag_t tag)
1746 {
1747 char label_buf[256];
1748 struct vrf *vrf;
1749 struct rtnexthop *rtnh;
1750
1751 rtnh = nl_attr_rtnh(nlmsg, req_size);
1752 if (rtnh == NULL)
1753 return false;
1754
1755 assert(nexthop);
1756
1757 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1758
1759 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1760 req_size, rtmsg, label_buf,
1761 sizeof(label_buf)))
1762 return false;
1763
1764 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1765 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1766
1767 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1768 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1769 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1770 return false;
1771 rtnh->rtnh_ifindex = nexthop->ifindex;
1772 if (nexthop->weight)
1773 rtnh->rtnh_hops = nexthop->weight - 1;
1774
1775 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1776 *src = &nexthop->rmap_src;
1777 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1778 *src = &nexthop->src;
1779
1780 if (IS_ZEBRA_DEBUG_KERNEL)
1781 zlog_debug(
1782 "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1783 __func__, routedesc, p, ipv4_ll_buf, label_buf,
1784 nexthop->ifindex, VRF_LOGNAME(vrf),
1785 nexthop->vrf_id);
1786 nl_attr_rtnh_end(nlmsg, rtnh);
1787 return true;
1788 }
1789
1790 if (nexthop->type == NEXTHOP_TYPE_IPV4
1791 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1792 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1793 nlmsg, req_size, bytelen,
1794 nexthop))
1795 return false;
1796
1797 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1798 *src = &nexthop->rmap_src;
1799 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1800 *src = &nexthop->src;
1801
1802 if (IS_ZEBRA_DEBUG_KERNEL)
1803 zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1804 __func__, routedesc, p, &nexthop->gate.ipv4,
1805 label_buf, nexthop->ifindex,
1806 VRF_LOGNAME(vrf), nexthop->vrf_id);
1807 }
1808 if (nexthop->type == NEXTHOP_TYPE_IPV6
1809 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1810 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1811 AF_INET6, nlmsg, req_size,
1812 bytelen, nexthop))
1813 return false;
1814
1815 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1816 *src = &nexthop->rmap_src;
1817 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1818 *src = &nexthop->src;
1819
1820 if (IS_ZEBRA_DEBUG_KERNEL)
1821 zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1822 __func__, routedesc, p, &nexthop->gate.ipv6,
1823 label_buf, nexthop->ifindex,
1824 VRF_LOGNAME(vrf), nexthop->vrf_id);
1825 }
1826
1827 /*
1828 * We have figured out the ifindex so we should always send it
1829 * This is especially useful if we are doing route
1830 * leaking.
1831 */
1832 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1833 rtnh->rtnh_ifindex = nexthop->ifindex;
1834
1835 /* ifindex */
1836 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1837 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1838 *src = &nexthop->rmap_src;
1839 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1840 *src = &nexthop->src;
1841
1842 if (IS_ZEBRA_DEBUG_KERNEL)
1843 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1844 __func__, routedesc, p, nexthop->ifindex,
1845 VRF_LOGNAME(vrf), nexthop->vrf_id);
1846 }
1847
1848 if (nexthop->weight)
1849 rtnh->rtnh_hops = nexthop->weight - 1;
1850
1851 if (!_netlink_set_tag(nlmsg, req_size, tag))
1852 return false;
1853
1854 nl_attr_rtnh_end(nlmsg, rtnh);
1855 return true;
1856 }
1857
1858 static inline bool
1859 _netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc,
1860 const struct zebra_nhlfe *nhlfe,
1861 struct nlmsghdr *nlmsg, struct rtmsg *rtmsg,
1862 size_t req_size, int cmd)
1863 {
1864 int bytelen;
1865 uint8_t family;
1866
1867 family = NHLFE_FAMILY(nhlfe);
1868 bytelen = (family == AF_INET ? 4 : 16);
1869 return _netlink_route_build_singlepath(p, routedesc, bytelen,
1870 nhlfe->nexthop, nlmsg, rtmsg,
1871 req_size, cmd);
1872 }
1873
1874
1875 static inline bool
1876 _netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
1877 const struct zebra_nhlfe *nhlfe,
1878 struct nlmsghdr *nlmsg, size_t req_size,
1879 struct rtmsg *rtmsg, const union g_addr **src)
1880 {
1881 int bytelen;
1882 uint8_t family;
1883
1884 family = NHLFE_FAMILY(nhlfe);
1885 bytelen = (family == AF_INET ? 4 : 16);
1886 return _netlink_route_build_multipath(p, routedesc, bytelen,
1887 nhlfe->nexthop, nlmsg, req_size,
1888 rtmsg, src, 0);
1889 }
1890
1891 static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
1892 {
1893 if (IS_ZEBRA_DEBUG_KERNEL)
1894 zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1895 routedesc, nl_msg_type_to_str(cmd), label);
1896 }
1897
1898 static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla,
1899 int llalen, ns_id_t ns_id, uint8_t family,
1900 bool permanent, uint8_t protocol)
1901 {
1902 struct {
1903 struct nlmsghdr n;
1904 struct ndmsg ndm;
1905 char buf[256];
1906 } req;
1907
1908 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
1909
1910 memset(&req, 0, sizeof(req));
1911
1912 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1913 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1914 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1915 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1916
1917 req.ndm.ndm_family = family;
1918 req.ndm.ndm_ifindex = ifindex;
1919 req.ndm.ndm_type = RTN_UNICAST;
1920 if (cmd == RTM_NEWNEIGH) {
1921 if (!permanent)
1922 req.ndm.ndm_state = NUD_REACHABLE;
1923 else
1924 req.ndm.ndm_state = NUD_PERMANENT;
1925 } else
1926 req.ndm.ndm_state = NUD_FAILED;
1927
1928 nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
1929 sizeof(protocol));
1930 req.ndm.ndm_type = RTN_UNICAST;
1931 nl_attr_put(&req.n, sizeof(req), NDA_DST, addr,
1932 family2addrsize(family));
1933 if (lla)
1934 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1935
1936 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1937 false);
1938 }
1939
1940 static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1941 union g_addr *src)
1942 {
1943 if (family == AF_INET) {
1944 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1945 src->ipv4 = nexthop->rmap_src.ipv4;
1946 return true;
1947 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1948 src->ipv4 = nexthop->src.ipv4;
1949 return true;
1950 }
1951 } else if (family == AF_INET6) {
1952 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1953 src->ipv6 = nexthop->rmap_src.ipv6;
1954 return true;
1955 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1956 src->ipv6 = nexthop->src.ipv6;
1957 return true;
1958 }
1959 }
1960
1961 return false;
1962 }
1963
1964 /*
1965 * The function returns true if the attribute could be added
1966 * to the message, otherwise false is returned.
1967 */
1968 static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
1969 struct nexthop *nh)
1970 {
1971 struct rtattr *nest;
1972
1973 switch (nh->nh_encap_type) {
1974 case NET_VXLAN:
1975 if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
1976 return false;
1977
1978 nest = nl_attr_nest(n, nlen, RTA_ENCAP);
1979 if (!nest)
1980 return false;
1981
1982 if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
1983 nh->nh_encap.vni))
1984 return false;
1985 nl_attr_nest_end(n, nest);
1986 break;
1987 }
1988
1989 return true;
1990 }
1991
1992 /*
1993 * Routing table change via netlink interface, using a dataplane context object
1994 *
1995 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
1996 * otherwise the number of bytes written to buf.
1997 */
1998 ssize_t netlink_route_multipath_msg_encode(int cmd,
1999 struct zebra_dplane_ctx *ctx,
2000 uint8_t *data, size_t datalen,
2001 bool fpm, bool force_nhg)
2002 {
2003 int bytelen;
2004 struct nexthop *nexthop = NULL;
2005 unsigned int nexthop_num;
2006 const char *routedesc;
2007 bool setsrc = false;
2008 union g_addr src;
2009 const struct prefix *p, *src_p;
2010 uint32_t table_id;
2011 struct nlsock *nl;
2012 route_tag_t tag = 0;
2013
2014 struct {
2015 struct nlmsghdr n;
2016 struct rtmsg r;
2017 char buf[];
2018 } *req = (void *)data;
2019
2020 p = dplane_ctx_get_dest(ctx);
2021 src_p = dplane_ctx_get_src(ctx);
2022
2023 if (datalen < sizeof(*req))
2024 return 0;
2025
2026 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2027
2028 memset(req, 0, sizeof(*req));
2029
2030 bytelen = (p->family == AF_INET ? 4 : 16);
2031
2032 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2033 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2034
2035 if ((cmd == RTM_NEWROUTE) &&
2036 ((p->family == AF_INET) || v6_rr_semantics))
2037 req->n.nlmsg_flags |= NLM_F_REPLACE;
2038
2039 req->n.nlmsg_type = cmd;
2040
2041 req->n.nlmsg_pid = nl->snl.nl_pid;
2042
2043 req->r.rtm_family = p->family;
2044 req->r.rtm_dst_len = p->prefixlen;
2045 req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
2046 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
2047
2048 if (cmd == RTM_DELROUTE)
2049 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
2050 else
2051 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
2052
2053 /*
2054 * blackhole routes are not RTN_UNICAST, they are
2055 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
2056 * so setting this value as a RTN_UNICAST would
2057 * cause the route lookup of just the prefix
2058 * to fail. So no need to specify this for
2059 * the RTM_DELROUTE case
2060 */
2061 if (cmd != RTM_DELROUTE)
2062 req->r.rtm_type = RTN_UNICAST;
2063
2064 if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
2065 return 0;
2066 if (src_p) {
2067 if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
2068 bytelen))
2069 return 0;
2070 }
2071
2072 /* Metric. */
2073 /* Hardcode the metric for all routes coming from zebra. Metric isn't
2074 * used
2075 * either by the kernel or by zebra. Its purely for calculating best
2076 * path(s)
2077 * by the routing protocol and for communicating with protocol peers.
2078 */
2079 if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
2080 ROUTE_INSTALLATION_METRIC))
2081 return 0;
2082
2083 #if defined(SUPPORT_REALMS)
2084 if (cmd == RTM_DELROUTE)
2085 tag = dplane_ctx_get_old_tag(ctx);
2086 else
2087 tag = dplane_ctx_get_tag(ctx);
2088 #endif
2089
2090 /* Table corresponding to this route. */
2091 table_id = dplane_ctx_get_table(ctx);
2092 if (table_id < 256)
2093 req->r.rtm_table = table_id;
2094 else {
2095 req->r.rtm_table = RT_TABLE_UNSPEC;
2096 if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
2097 return 0;
2098 }
2099
2100 if (IS_ZEBRA_DEBUG_KERNEL)
2101 zlog_debug(
2102 "%s: %s %pFX vrf %u(%u)", __func__,
2103 nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
2104 table_id);
2105
2106 /*
2107 * If we are not updating the route and we have received
2108 * a route delete, then all we need to fill in is the
2109 * prefix information to tell the kernel to schwack
2110 * it.
2111 */
2112 if (cmd == RTM_DELROUTE) {
2113 if (!_netlink_set_tag(&req->n, datalen, tag))
2114 return 0;
2115 return NLMSG_ALIGN(req->n.nlmsg_len);
2116 }
2117
2118 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
2119 struct rtattr *nest;
2120 uint32_t mtu = dplane_ctx_get_mtu(ctx);
2121 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
2122
2123 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
2124 mtu = nexthop_mtu;
2125
2126 nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
2127 if (nest == NULL)
2128 return 0;
2129
2130 if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
2131 return 0;
2132 nl_attr_nest_end(&req->n, nest);
2133 }
2134
2135 /*
2136 * Always install blackhole routes without using nexthops, because of
2137 * the following kernel problems:
2138 * 1. Kernel nexthops don't suport unreachable/prohibit route types.
2139 * 2. Blackhole kernel nexthops are deleted when loopback is down.
2140 */
2141 nexthop = dplane_ctx_get_ng(ctx)->nexthop;
2142 if (nexthop) {
2143 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2144 nexthop = nexthop->resolved;
2145
2146 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
2147 switch (nexthop->bh_type) {
2148 case BLACKHOLE_ADMINPROHIB:
2149 req->r.rtm_type = RTN_PROHIBIT;
2150 break;
2151 case BLACKHOLE_REJECT:
2152 req->r.rtm_type = RTN_UNREACHABLE;
2153 break;
2154 default:
2155 req->r.rtm_type = RTN_BLACKHOLE;
2156 break;
2157 }
2158 return NLMSG_ALIGN(req->n.nlmsg_len);
2159 }
2160 }
2161
2162 if ((!fpm && kernel_nexthops_supported()
2163 && (!proto_nexthops_only()
2164 || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0)))
2165 || (fpm && force_nhg)) {
2166 /* Kernel supports nexthop objects */
2167 if (IS_ZEBRA_DEBUG_KERNEL)
2168 zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
2169 dplane_ctx_get_nhe_id(ctx));
2170
2171 if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
2172 dplane_ctx_get_nhe_id(ctx)))
2173 return 0;
2174
2175 /* Have to determine src still */
2176 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2177 if (setsrc)
2178 break;
2179
2180 setsrc = nexthop_set_src(nexthop, p->family, &src);
2181 }
2182
2183 if (setsrc) {
2184 if (p->family == AF_INET) {
2185 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2186 &src.ipv4, bytelen))
2187 return 0;
2188 } else if (p->family == AF_INET6) {
2189 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2190 &src.ipv6, bytelen))
2191 return 0;
2192 }
2193 }
2194
2195 return NLMSG_ALIGN(req->n.nlmsg_len);
2196 }
2197
2198 /* Count overall nexthops so we can decide whether to use singlepath
2199 * or multipath case.
2200 */
2201 nexthop_num = 0;
2202 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2203 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2204 continue;
2205 if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
2206 continue;
2207
2208 nexthop_num++;
2209 }
2210
2211 /* Singlepath case. */
2212 if (nexthop_num == 1) {
2213 nexthop_num = 0;
2214 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2215 if (CHECK_FLAG(nexthop->flags,
2216 NEXTHOP_FLAG_RECURSIVE)) {
2217
2218 if (setsrc)
2219 continue;
2220
2221 setsrc = nexthop_set_src(nexthop, p->family,
2222 &src);
2223 continue;
2224 }
2225
2226 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2227 routedesc = nexthop->rparent
2228 ? "recursive, single-path"
2229 : "single-path";
2230
2231 if (!_netlink_set_tag(&req->n, datalen, tag))
2232 return 0;
2233
2234 if (!_netlink_route_build_singlepath(
2235 p, routedesc, bytelen, nexthop,
2236 &req->n, &req->r, datalen, cmd))
2237 return 0;
2238 nexthop_num++;
2239 break;
2240 }
2241
2242 /*
2243 * Add encapsulation information when installing via
2244 * FPM.
2245 */
2246 if (fpm) {
2247 if (!netlink_route_nexthop_encap(
2248 &req->n, datalen, nexthop))
2249 return 0;
2250 }
2251 }
2252
2253 if (setsrc) {
2254 if (p->family == AF_INET) {
2255 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2256 &src.ipv4, bytelen))
2257 return 0;
2258 } else if (p->family == AF_INET6) {
2259 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2260 &src.ipv6, bytelen))
2261 return 0;
2262 }
2263 }
2264 } else { /* Multipath case */
2265 struct rtattr *nest;
2266 const union g_addr *src1 = NULL;
2267
2268 nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
2269 if (nest == NULL)
2270 return 0;
2271
2272 nexthop_num = 0;
2273 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2274 if (CHECK_FLAG(nexthop->flags,
2275 NEXTHOP_FLAG_RECURSIVE)) {
2276 /* This only works for IPv4 now */
2277 if (setsrc)
2278 continue;
2279
2280 setsrc = nexthop_set_src(nexthop, p->family,
2281 &src);
2282 continue;
2283 }
2284
2285 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2286 routedesc = nexthop->rparent
2287 ? "recursive, multipath"
2288 : "multipath";
2289 nexthop_num++;
2290
2291 if (!_netlink_route_build_multipath(
2292 p, routedesc, bytelen, nexthop,
2293 &req->n, datalen, &req->r, &src1,
2294 tag))
2295 return 0;
2296
2297 if (!setsrc && src1) {
2298 if (p->family == AF_INET)
2299 src.ipv4 = src1->ipv4;
2300 else if (p->family == AF_INET6)
2301 src.ipv6 = src1->ipv6;
2302
2303 setsrc = 1;
2304 }
2305 }
2306 }
2307
2308 nl_attr_nest_end(&req->n, nest);
2309
2310 /*
2311 * Add encapsulation information when installing via
2312 * FPM.
2313 */
2314 if (fpm) {
2315 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
2316 nexthop)) {
2317 if (CHECK_FLAG(nexthop->flags,
2318 NEXTHOP_FLAG_RECURSIVE))
2319 continue;
2320 if (!netlink_route_nexthop_encap(
2321 &req->n, datalen, nexthop))
2322 return 0;
2323 }
2324 }
2325
2326
2327 if (setsrc) {
2328 if (p->family == AF_INET) {
2329 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2330 &src.ipv4, bytelen))
2331 return 0;
2332 } else if (p->family == AF_INET6) {
2333 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2334 &src.ipv6, bytelen))
2335 return 0;
2336 }
2337 if (IS_ZEBRA_DEBUG_KERNEL)
2338 zlog_debug("Setting source");
2339 }
2340 }
2341
2342 /* If there is no useful nexthop then return. */
2343 if (nexthop_num == 0) {
2344 if (IS_ZEBRA_DEBUG_KERNEL)
2345 zlog_debug("%s: No useful nexthop.", __func__);
2346 }
2347
2348 return NLMSG_ALIGN(req->n.nlmsg_len);
2349 }
2350
2351 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
2352 {
2353 uint32_t actual_table;
2354 int suc = 0;
2355 struct mcast_route_data *mr = (struct mcast_route_data *)in;
2356 struct {
2357 struct nlmsghdr n;
2358 struct rtmsg rtm;
2359 char buf[256];
2360 } req;
2361
2362 mroute = mr;
2363 struct zebra_ns *zns;
2364
2365 zns = zvrf->zns;
2366 memset(&req, 0, sizeof(req));
2367
2368 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2369 req.n.nlmsg_flags = NLM_F_REQUEST;
2370 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2371
2372 req.n.nlmsg_type = RTM_GETROUTE;
2373
2374 if (mroute->family == AF_INET) {
2375 req.rtm.rtm_family = RTNL_FAMILY_IPMR;
2376 req.rtm.rtm_dst_len = IPV4_MAX_BITLEN;
2377 req.rtm.rtm_src_len = IPV4_MAX_BITLEN;
2378
2379 nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2380 &mroute->src.ipaddr_v4,
2381 sizeof(mroute->src.ipaddr_v4));
2382 nl_attr_put(&req.n, sizeof(req), RTA_DST,
2383 &mroute->grp.ipaddr_v4,
2384 sizeof(mroute->grp.ipaddr_v4));
2385 } else {
2386 req.rtm.rtm_family = RTNL_FAMILY_IP6MR;
2387 req.rtm.rtm_dst_len = IPV6_MAX_BITLEN;
2388 req.rtm.rtm_src_len = IPV6_MAX_BITLEN;
2389
2390 nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2391 &mroute->src.ipaddr_v6,
2392 sizeof(mroute->src.ipaddr_v6));
2393 nl_attr_put(&req.n, sizeof(req), RTA_DST,
2394 &mroute->grp.ipaddr_v6,
2395 sizeof(mroute->grp.ipaddr_v6));
2396 }
2397
2398 /*
2399 * What?
2400 *
2401 * So during the namespace cleanup we started storing
2402 * the zvrf table_id for the default table as RT_TABLE_MAIN
2403 * which is what the normal routing table for ip routing is.
2404 * This change caused this to break our lookups of sg data
2405 * because prior to this change the zvrf->table_id was 0
2406 * and when the pim multicast kernel code saw a 0,
2407 * it was auto-translated to RT_TABLE_DEFAULT. But since
2408 * we are now passing in RT_TABLE_MAIN there is no auto-translation
2409 * and the kernel goes screw you and the delicious cookies you
2410 * are trying to give me. So now we have this little hack.
2411 */
2412 if (mroute->family == AF_INET)
2413 actual_table = (zvrf->table_id == RT_TABLE_MAIN)
2414 ? RT_TABLE_DEFAULT
2415 : zvrf->table_id;
2416 else
2417 actual_table = zvrf->table_id;
2418
2419 nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
2420
2421 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
2422 &zns->netlink_cmd, zns, false);
2423
2424 mroute = NULL;
2425 return suc;
2426 }
2427
2428 /* Char length to debug ID with */
2429 #define ID_LENGTH 10
2430
2431 static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
2432 uint32_t id,
2433 const struct nh_grp *z_grp,
2434 const uint8_t count, bool resilient,
2435 const struct nhg_resilience *nhgr)
2436 {
2437 struct nexthop_grp grp[count];
2438 /* Need space for max group size, "/", and null term */
2439 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
2440 char buf1[ID_LENGTH + 2];
2441
2442 buf[0] = '\0';
2443
2444 memset(grp, 0, sizeof(grp));
2445
2446 if (count) {
2447 for (int i = 0; i < count; i++) {
2448 grp[i].id = z_grp[i].id;
2449 grp[i].weight = z_grp[i].weight - 1;
2450
2451 if (IS_ZEBRA_DEBUG_KERNEL) {
2452 if (i == 0)
2453 snprintf(buf, sizeof(buf1), "group %u",
2454 grp[i].id);
2455 else {
2456 snprintf(buf1, sizeof(buf1), "/%u",
2457 grp[i].id);
2458 strlcat(buf, buf1, sizeof(buf));
2459 }
2460 }
2461 }
2462 if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2463 count * sizeof(*grp)))
2464 return false;
2465
2466 if (resilient) {
2467 struct rtattr *nest;
2468
2469 nest = nl_attr_nest(n, req_size, NHA_RES_GROUP);
2470
2471 nl_attr_put16(n, req_size, NHA_RES_GROUP_BUCKETS,
2472 nhgr->buckets);
2473 nl_attr_put32(n, req_size, NHA_RES_GROUP_IDLE_TIMER,
2474 nhgr->idle_timer * 1000);
2475 nl_attr_put32(n, req_size,
2476 NHA_RES_GROUP_UNBALANCED_TIMER,
2477 nhgr->unbalanced_timer * 1000);
2478 nl_attr_nest_end(n, nest);
2479
2480 nl_attr_put16(n, req_size, NHA_GROUP_TYPE,
2481 NEXTHOP_GRP_TYPE_RES);
2482 }
2483 }
2484
2485 if (IS_ZEBRA_DEBUG_KERNEL)
2486 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
2487
2488 return true;
2489 }
2490
2491 /**
2492 * Next hop packet encoding helper function.
2493 *
2494 * \param[in] cmd netlink command.
2495 * \param[in] ctx dataplane context (information snapshot).
2496 * \param[out] buf buffer to hold the packet.
2497 * \param[in] buflen amount of buffer bytes.
2498 *
2499 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2500 * otherwise the number of bytes written to buf.
2501 */
2502 ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2503 const struct zebra_dplane_ctx *ctx,
2504 void *buf, size_t buflen, bool fpm)
2505 {
2506 struct {
2507 struct nlmsghdr n;
2508 struct nhmsg nhm;
2509 char buf[];
2510 } *req = buf;
2511
2512 mpls_lse_t out_lse[MPLS_MAX_LABELS];
2513 char label_buf[256];
2514 int num_labels = 0;
2515 uint32_t id = dplane_ctx_get_nhe_id(ctx);
2516 int type = dplane_ctx_get_nhe_type(ctx);
2517 struct rtattr *nest;
2518 uint16_t encap;
2519 struct nlsock *nl =
2520 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2521
2522 if (!id) {
2523 flog_err(
2524 EC_ZEBRA_NHG_FIB_UPDATE,
2525 "Failed trying to update a nexthop group in the kernel that does not have an ID");
2526 return -1;
2527 }
2528
2529 /*
2530 * Nothing to do if the kernel doesn't support nexthop objects or
2531 * we dont want to install this type of NHG, but FPM may possible to
2532 * handle this.
2533 */
2534 if (!fpm && !kernel_nexthops_supported()) {
2535 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2536 zlog_debug(
2537 "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring",
2538 __func__, id, zebra_route_string(type));
2539 return 0;
2540 }
2541
2542 if (proto_nexthops_only() && !is_proto_nhg(id, type)) {
2543 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2544 zlog_debug(
2545 "%s: nhg_id %u (%s): proto-based nexthops only, ignoring",
2546 __func__, id, zebra_route_string(type));
2547 return 0;
2548 }
2549
2550 label_buf[0] = '\0';
2551
2552 if (buflen < sizeof(*req))
2553 return 0;
2554
2555 memset(req, 0, sizeof(*req));
2556
2557 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2558 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2559
2560 if (cmd == RTM_NEWNEXTHOP)
2561 req->n.nlmsg_flags |= NLM_F_REPLACE;
2562
2563 req->n.nlmsg_type = cmd;
2564 req->n.nlmsg_pid = nl->snl.nl_pid;
2565
2566 req->nhm.nh_family = AF_UNSPEC;
2567 /* TODO: Scope? */
2568
2569 if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2570 return 0;
2571
2572 if (cmd == RTM_NEWNEXTHOP) {
2573 /*
2574 * We distinguish between a "group", which is a collection
2575 * of ids, and a singleton nexthop with an id. The
2576 * group is installed as an id that just refers to a list of
2577 * other ids.
2578 */
2579 if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
2580 const struct nexthop_group *nhg;
2581 const struct nhg_resilience *nhgr;
2582
2583 nhg = dplane_ctx_get_nhe_ng(ctx);
2584 nhgr = &nhg->nhgr;
2585 if (!_netlink_nexthop_build_group(
2586 &req->n, buflen, id,
2587 dplane_ctx_get_nhe_nh_grp(ctx),
2588 dplane_ctx_get_nhe_nh_grp_count(ctx),
2589 !!nhgr->buckets, nhgr))
2590 return 0;
2591 } else {
2592 const struct nexthop *nh =
2593 dplane_ctx_get_nhe_ng(ctx)->nexthop;
2594 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
2595
2596 if (afi == AFI_IP)
2597 req->nhm.nh_family = AF_INET;
2598 else if (afi == AFI_IP6)
2599 req->nhm.nh_family = AF_INET6;
2600
2601 switch (nh->type) {
2602 case NEXTHOP_TYPE_IPV4:
2603 case NEXTHOP_TYPE_IPV4_IFINDEX:
2604 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2605 &nh->gate.ipv4,
2606 IPV4_MAX_BYTELEN))
2607 return 0;
2608 break;
2609 case NEXTHOP_TYPE_IPV6:
2610 case NEXTHOP_TYPE_IPV6_IFINDEX:
2611 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2612 &nh->gate.ipv6,
2613 IPV6_MAX_BYTELEN))
2614 return 0;
2615 break;
2616 case NEXTHOP_TYPE_BLACKHOLE:
2617 if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2618 NULL, 0))
2619 return 0;
2620 /* Blackhole shouldn't have anymore attributes
2621 */
2622 goto nexthop_done;
2623 case NEXTHOP_TYPE_IFINDEX:
2624 /* Don't need anymore info for this */
2625 break;
2626 }
2627
2628 if (!nh->ifindex) {
2629 flog_err(
2630 EC_ZEBRA_NHG_FIB_UPDATE,
2631 "Context received for kernel nexthop update without an interface");
2632 return -1;
2633 }
2634
2635 if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2636 nh->ifindex))
2637 return 0;
2638
2639 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
2640 req->nhm.nh_flags |= RTNH_F_ONLINK;
2641
2642 num_labels =
2643 build_label_stack(nh->nh_label, out_lse,
2644 label_buf, sizeof(label_buf));
2645
2646 if (num_labels) {
2647 /* Set the BoS bit */
2648 out_lse[num_labels - 1] |=
2649 htonl(1 << MPLS_LS_S_SHIFT);
2650
2651 /*
2652 * TODO: MPLS unsupported for now in kernel.
2653 */
2654 if (req->nhm.nh_family == AF_MPLS)
2655 goto nexthop_done;
2656
2657 encap = LWTUNNEL_ENCAP_MPLS;
2658 if (!nl_attr_put16(&req->n, buflen,
2659 NHA_ENCAP_TYPE, encap))
2660 return 0;
2661 nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP);
2662 if (!nest)
2663 return 0;
2664 if (!nl_attr_put(
2665 &req->n, buflen, MPLS_IPTUNNEL_DST,
2666 &out_lse,
2667 num_labels * sizeof(mpls_lse_t)))
2668 return 0;
2669
2670 nl_attr_nest_end(&req->n, nest);
2671 }
2672
2673 if (nh->nh_srv6) {
2674 if (nh->nh_srv6->seg6local_action !=
2675 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
2676 uint32_t action;
2677 uint16_t encap;
2678 struct rtattr *nest;
2679 const struct seg6local_context *ctx;
2680
2681 req->nhm.nh_family = AF_INET6;
2682 action = nh->nh_srv6->seg6local_action;
2683 ctx = &nh->nh_srv6->seg6local_ctx;
2684 encap = LWTUNNEL_ENCAP_SEG6_LOCAL;
2685 if (!nl_attr_put(&req->n, buflen,
2686 NHA_ENCAP_TYPE,
2687 &encap,
2688 sizeof(uint16_t)))
2689 return 0;
2690
2691 nest = nl_attr_nest(&req->n, buflen,
2692 NHA_ENCAP | NLA_F_NESTED);
2693 if (!nest)
2694 return 0;
2695
2696 switch (action) {
2697 case SEG6_LOCAL_ACTION_END:
2698 if (!nl_attr_put32(
2699 &req->n, buflen,
2700 SEG6_LOCAL_ACTION,
2701 SEG6_LOCAL_ACTION_END))
2702 return 0;
2703 break;
2704 case SEG6_LOCAL_ACTION_END_X:
2705 if (!nl_attr_put32(
2706 &req->n, buflen,
2707 SEG6_LOCAL_ACTION,
2708 SEG6_LOCAL_ACTION_END_X))
2709 return 0;
2710 if (!nl_attr_put(
2711 &req->n, buflen,
2712 SEG6_LOCAL_NH6, &ctx->nh6,
2713 sizeof(struct in6_addr)))
2714 return 0;
2715 break;
2716 case SEG6_LOCAL_ACTION_END_T:
2717 if (!nl_attr_put32(
2718 &req->n, buflen,
2719 SEG6_LOCAL_ACTION,
2720 SEG6_LOCAL_ACTION_END_T))
2721 return 0;
2722 if (!nl_attr_put32(
2723 &req->n, buflen,
2724 SEG6_LOCAL_TABLE,
2725 ctx->table))
2726 return 0;
2727 break;
2728 case SEG6_LOCAL_ACTION_END_DX4:
2729 if (!nl_attr_put32(
2730 &req->n, buflen,
2731 SEG6_LOCAL_ACTION,
2732 SEG6_LOCAL_ACTION_END_DX4))
2733 return 0;
2734 if (!nl_attr_put(
2735 &req->n, buflen,
2736 SEG6_LOCAL_NH4, &ctx->nh4,
2737 sizeof(struct in_addr)))
2738 return 0;
2739 break;
2740 case SEG6_LOCAL_ACTION_END_DT6:
2741 if (!nl_attr_put32(
2742 &req->n, buflen,
2743 SEG6_LOCAL_ACTION,
2744 SEG6_LOCAL_ACTION_END_DT6))
2745 return 0;
2746 if (!nl_attr_put32(
2747 &req->n, buflen,
2748 SEG6_LOCAL_TABLE,
2749 ctx->table))
2750 return 0;
2751 break;
2752 case SEG6_LOCAL_ACTION_END_DT4:
2753 if (!nl_attr_put32(
2754 &req->n, buflen,
2755 SEG6_LOCAL_ACTION,
2756 SEG6_LOCAL_ACTION_END_DT4))
2757 return 0;
2758 if (!nl_attr_put32(
2759 &req->n, buflen,
2760 SEG6_LOCAL_VRFTABLE,
2761 ctx->table))
2762 return 0;
2763 break;
2764 case SEG6_LOCAL_ACTION_END_DT46:
2765 if (!nl_attr_put32(
2766 &req->n, buflen,
2767 SEG6_LOCAL_ACTION,
2768 SEG6_LOCAL_ACTION_END_DT46))
2769 return 0;
2770 if (!nl_attr_put32(
2771 &req->n, buflen,
2772 SEG6_LOCAL_VRFTABLE,
2773 ctx->table))
2774 return 0;
2775 break;
2776 default:
2777 zlog_err("%s: unsupport seg6local behaviour action=%u",
2778 __func__, action);
2779 return 0;
2780 }
2781 nl_attr_nest_end(&req->n, nest);
2782 }
2783
2784 if (!sid_zero(&nh->nh_srv6->seg6_segs)) {
2785 char tun_buf[4096];
2786 ssize_t tun_len;
2787 struct rtattr *nest;
2788
2789 if (!nl_attr_put16(&req->n, buflen,
2790 NHA_ENCAP_TYPE,
2791 LWTUNNEL_ENCAP_SEG6))
2792 return 0;
2793 nest = nl_attr_nest(&req->n, buflen,
2794 NHA_ENCAP | NLA_F_NESTED);
2795 if (!nest)
2796 return 0;
2797 tun_len = fill_seg6ipt_encap(tun_buf,
2798 sizeof(tun_buf),
2799 &nh->nh_srv6->seg6_segs);
2800 if (tun_len < 0)
2801 return 0;
2802 if (!nl_attr_put(&req->n, buflen,
2803 SEG6_IPTUNNEL_SRH,
2804 tun_buf, tun_len))
2805 return 0;
2806 nl_attr_nest_end(&req->n, nest);
2807 }
2808 }
2809
2810 nexthop_done:
2811
2812 if (IS_ZEBRA_DEBUG_KERNEL)
2813 zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2814 __func__, id, nh, nh->ifindex,
2815 vrf_id_to_name(nh->vrf_id),
2816 nh->vrf_id, label_buf);
2817 }
2818
2819 req->nhm.nh_protocol = zebra2proto(type);
2820
2821 } else if (cmd != RTM_DELNEXTHOP) {
2822 flog_err(
2823 EC_ZEBRA_NHG_FIB_UPDATE,
2824 "Nexthop group kernel update command (%d) does not exist",
2825 cmd);
2826 return -1;
2827 }
2828
2829 if (IS_ZEBRA_DEBUG_KERNEL)
2830 zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2831 id);
2832
2833 return NLMSG_ALIGN(req->n.nlmsg_len);
2834 }
2835
2836 static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx,
2837 void *buf, size_t buflen)
2838 {
2839 enum dplane_op_e op;
2840 int cmd = 0;
2841
2842 op = dplane_ctx_get_op(ctx);
2843 if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
2844 cmd = RTM_NEWNEXTHOP;
2845 else if (op == DPLANE_OP_NH_DELETE)
2846 cmd = RTM_DELNEXTHOP;
2847 else {
2848 flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2849 "Context received for kernel nexthop update with incorrect OP code (%u)",
2850 op);
2851 return -1;
2852 }
2853
2854 return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen, false);
2855 }
2856
2857 enum netlink_msg_status
2858 netlink_put_nexthop_update_msg(struct nl_batch *bth,
2859 struct zebra_dplane_ctx *ctx)
2860 {
2861 /* Nothing to do if the kernel doesn't support nexthop objects */
2862 if (!kernel_nexthops_supported())
2863 return FRR_NETLINK_SUCCESS;
2864
2865 return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder,
2866 false);
2867 }
2868
2869 static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2870 void *buf, size_t buflen)
2871 {
2872 return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf,
2873 buflen, false, false);
2874 }
2875
2876 static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2877 void *buf, size_t buflen)
2878 {
2879 return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf,
2880 buflen, false, false);
2881 }
2882
2883 enum netlink_msg_status
2884 netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
2885 {
2886 int cmd;
2887 const struct prefix *p = dplane_ctx_get_dest(ctx);
2888
2889 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2890 cmd = RTM_DELROUTE;
2891 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2892 cmd = RTM_NEWROUTE;
2893 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2894
2895 if (p->family == AF_INET || v6_rr_semantics) {
2896 /* Single 'replace' operation */
2897
2898 /*
2899 * With route replace semantics in place
2900 * for v4 routes and the new route is a system
2901 * route we do not install anything.
2902 * The problem here is that the new system
2903 * route should cause us to withdraw from
2904 * the kernel the old non-system route
2905 */
2906 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))
2907 && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2908 return netlink_batch_add_msg(
2909 bth, ctx, netlink_delroute_msg_encoder,
2910 true);
2911 } else {
2912 /*
2913 * So v6 route replace semantics are not in
2914 * the kernel at this point as I understand it.
2915 * so let's do a delete then an add.
2916 * In the future once v6 route replace semantics
2917 * are in we can figure out what to do here to
2918 * allow working with old and new kernels.
2919 *
2920 * I'm also intentionally ignoring the failure case
2921 * of the route delete. If that happens yeah we're
2922 * screwed.
2923 */
2924 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2925 netlink_batch_add_msg(
2926 bth, ctx, netlink_delroute_msg_encoder,
2927 true);
2928 }
2929
2930 cmd = RTM_NEWROUTE;
2931 } else
2932 return FRR_NETLINK_ERROR;
2933
2934 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
2935 return FRR_NETLINK_SUCCESS;
2936
2937 return netlink_batch_add_msg(bth, ctx,
2938 cmd == RTM_NEWROUTE
2939 ? netlink_newroute_msg_encoder
2940 : netlink_delroute_msg_encoder,
2941 false);
2942 }
2943
2944 /**
2945 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2946 *
2947 * @tb: Netlink RTA data
2948 * @family: Address family in the nhmsg
2949 * @ifp: Interface connected - this should be NULL, we fill it in
2950 * @ns_id: Namspace id
2951 *
2952 * Return: New nexthop
2953 */
2954 static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2955 unsigned char family,
2956 struct interface **ifp,
2957 ns_id_t ns_id)
2958 {
2959 struct nexthop nh = {};
2960 void *gate = NULL;
2961 enum nexthop_types_t type = 0;
2962 int if_index = 0;
2963 size_t sz = 0;
2964 struct interface *ifp_lookup;
2965
2966 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2967
2968
2969 if (tb[NHA_GATEWAY]) {
2970 switch (family) {
2971 case AF_INET:
2972 type = NEXTHOP_TYPE_IPV4_IFINDEX;
2973 sz = 4;
2974 break;
2975 case AF_INET6:
2976 type = NEXTHOP_TYPE_IPV6_IFINDEX;
2977 sz = 16;
2978 break;
2979 default:
2980 flog_warn(
2981 EC_ZEBRA_BAD_NHG_MESSAGE,
2982 "Nexthop gateway with bad address family (%d) received from kernel",
2983 family);
2984 return nh;
2985 }
2986 gate = RTA_DATA(tb[NHA_GATEWAY]);
2987 } else
2988 type = NEXTHOP_TYPE_IFINDEX;
2989
2990 if (type)
2991 nh.type = type;
2992
2993 if (gate)
2994 memcpy(&(nh.gate), gate, sz);
2995
2996 if (if_index)
2997 nh.ifindex = if_index;
2998
2999 ifp_lookup =
3000 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
3001
3002 if (ifp)
3003 *ifp = ifp_lookup;
3004 if (ifp_lookup)
3005 nh.vrf_id = ifp_lookup->vrf->vrf_id;
3006 else {
3007 flog_warn(
3008 EC_ZEBRA_UNKNOWN_INTERFACE,
3009 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
3010 __func__, nh.ifindex);
3011
3012 nh.vrf_id = VRF_DEFAULT;
3013 }
3014
3015 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
3016 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
3017 int num_labels = 0;
3018
3019 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
3020
3021 if (encap_type == LWTUNNEL_ENCAP_MPLS)
3022 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
3023
3024 if (num_labels)
3025 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
3026 labels);
3027 }
3028
3029 return nh;
3030 }
3031
3032 static int netlink_nexthop_process_group(struct rtattr **tb,
3033 struct nh_grp *z_grp, int z_grp_size,
3034 struct nhg_resilience *nhgr)
3035 {
3036 uint8_t count = 0;
3037 /* linux/nexthop.h group struct */
3038 struct nexthop_grp *n_grp = NULL;
3039
3040 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
3041 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
3042
3043 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
3044 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
3045 "Invalid nexthop group received from the kernel");
3046 return count;
3047 }
3048
3049 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
3050 z_grp[i].id = n_grp[i].id;
3051 z_grp[i].weight = n_grp[i].weight + 1;
3052 }
3053
3054 memset(nhgr, 0, sizeof(*nhgr));
3055 if (tb[NHA_RES_GROUP]) {
3056 struct rtattr *tbn[NHA_RES_GROUP_MAX + 1];
3057 struct rtattr *rta;
3058 struct rtattr *res_group = tb[NHA_RES_GROUP];
3059
3060 netlink_parse_rtattr_nested(tbn, NHA_RES_GROUP_MAX, res_group);
3061
3062 if (tbn[NHA_RES_GROUP_BUCKETS]) {
3063 rta = tbn[NHA_RES_GROUP_BUCKETS];
3064 nhgr->buckets = *(uint16_t *)RTA_DATA(rta);
3065 }
3066
3067 if (tbn[NHA_RES_GROUP_IDLE_TIMER]) {
3068 rta = tbn[NHA_RES_GROUP_IDLE_TIMER];
3069 nhgr->idle_timer = *(uint32_t *)RTA_DATA(rta);
3070 }
3071
3072 if (tbn[NHA_RES_GROUP_UNBALANCED_TIMER]) {
3073 rta = tbn[NHA_RES_GROUP_UNBALANCED_TIMER];
3074 nhgr->unbalanced_timer = *(uint32_t *)RTA_DATA(rta);
3075 }
3076
3077 if (tbn[NHA_RES_GROUP_UNBALANCED_TIME]) {
3078 rta = tbn[NHA_RES_GROUP_UNBALANCED_TIME];
3079 nhgr->unbalanced_time = *(uint64_t *)RTA_DATA(rta);
3080 }
3081 }
3082
3083 return count;
3084 }
3085
3086 /**
3087 * netlink_nexthop_change() - Read in change about nexthops from the kernel
3088 *
3089 * @h: Netlink message header
3090 * @ns_id: Namspace id
3091 * @startup: Are we reading under startup conditions?
3092 *
3093 * Return: Result status
3094 */
3095 int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3096 {
3097 int len;
3098 /* nexthop group id */
3099 uint32_t id;
3100 unsigned char family;
3101 int type;
3102 afi_t afi = AFI_UNSPEC;
3103 vrf_id_t vrf_id = VRF_DEFAULT;
3104 struct interface *ifp = NULL;
3105 struct nhmsg *nhm = NULL;
3106 struct nexthop nh = {};
3107 struct nh_grp grp[MULTIPATH_NUM] = {};
3108 /* Count of nexthops in group array */
3109 uint8_t grp_count = 0;
3110 struct rtattr *tb[NHA_MAX + 1] = {};
3111
3112 frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup);
3113
3114 nhm = NLMSG_DATA(h);
3115
3116 if (ns_id)
3117 vrf_id = ns_id;
3118
3119 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
3120 return 0;
3121
3122 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
3123 if (len < 0) {
3124 zlog_warn(
3125 "%s: Message received from netlink is of a broken size %d %zu",
3126 __func__, h->nlmsg_len,
3127 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
3128 return -1;
3129 }
3130
3131 netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len,
3132 NLA_F_NESTED);
3133
3134
3135 if (!tb[NHA_ID]) {
3136 flog_warn(
3137 EC_ZEBRA_BAD_NHG_MESSAGE,
3138 "Nexthop group without an ID received from the kernel");
3139 return -1;
3140 }
3141
3142 /* We use the ID key'd nhg table for kernel updates */
3143 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
3144
3145 if (zebra_evpn_mh_is_fdb_nh(id)) {
3146 /* If this is a L2 NH just ignore it */
3147 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3148 zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
3149 h->nlmsg_type, id);
3150 }
3151 return 0;
3152 }
3153
3154 family = nhm->nh_family;
3155 afi = family2afi(family);
3156
3157 type = proto2zebra(nhm->nh_protocol, 0, true);
3158
3159 if (IS_ZEBRA_DEBUG_KERNEL)
3160 zlog_debug("%s ID (%u) %s NS %u",
3161 nl_msg_type_to_str(h->nlmsg_type), id,
3162 nl_family_to_str(family), ns_id);
3163
3164
3165 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
3166 struct nhg_resilience nhgr = {};
3167
3168 if (tb[NHA_GROUP]) {
3169 /**
3170 * If this is a group message its only going to have
3171 * an array of nexthop IDs associated with it
3172 */
3173 grp_count = netlink_nexthop_process_group(
3174 tb, grp, array_size(grp), &nhgr);
3175 } else {
3176 if (tb[NHA_BLACKHOLE]) {
3177 /**
3178 * This nexthop is just for blackhole-ing
3179 * traffic, it should not have an OIF, GATEWAY,
3180 * or ENCAP
3181 */
3182 nh.type = NEXTHOP_TYPE_BLACKHOLE;
3183 nh.bh_type = BLACKHOLE_UNSPEC;
3184 } else if (tb[NHA_OIF])
3185 /**
3186 * This is a true new nexthop, so we need
3187 * to parse the gateway and device info
3188 */
3189 nh = netlink_nexthop_process_nh(tb, family,
3190 &ifp, ns_id);
3191 else {
3192
3193 flog_warn(
3194 EC_ZEBRA_BAD_NHG_MESSAGE,
3195 "Invalid Nexthop message received from the kernel with ID (%u)",
3196 id);
3197 return -1;
3198 }
3199 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
3200 if (nhm->nh_flags & RTNH_F_ONLINK)
3201 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
3202 vrf_id = nh.vrf_id;
3203 }
3204
3205 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
3206 type, startup, &nhgr))
3207 return -1;
3208
3209 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
3210 zebra_nhg_kernel_del(id, vrf_id);
3211
3212 return 0;
3213 }
3214
3215 /**
3216 * netlink_request_nexthop() - Request nextop information from the kernel
3217 * @zns: Zebra namespace
3218 * @family: AF_* netlink family
3219 * @type: RTM_* route type
3220 *
3221 * Return: Result status
3222 */
3223 static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
3224 {
3225 struct {
3226 struct nlmsghdr n;
3227 struct nhmsg nhm;
3228 } req;
3229
3230 /* Form the request, specifying filter (rtattr) if needed. */
3231 memset(&req, 0, sizeof(req));
3232 req.n.nlmsg_type = type;
3233 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3234 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3235 req.nhm.nh_family = family;
3236
3237 return netlink_request(&zns->netlink_cmd, &req);
3238 }
3239
3240
3241 /**
3242 * netlink_nexthop_read() - Nexthop read function using netlink interface
3243 *
3244 * @zns: Zebra name space
3245 *
3246 * Return: Result status
3247 * Only called at bootstrap time.
3248 */
3249 int netlink_nexthop_read(struct zebra_ns *zns)
3250 {
3251 int ret;
3252 struct zebra_dplane_info dp_info;
3253
3254 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3255
3256 /* Get nexthop objects */
3257 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
3258 if (ret < 0)
3259 return ret;
3260 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
3261 &dp_info, 0, true);
3262
3263 if (!ret)
3264 /* If we succesfully read in nexthop objects,
3265 * this kernel must support them.
3266 */
3267 supports_nh = true;
3268 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
3269 zlog_debug("Nexthop objects %ssupported on this kernel",
3270 supports_nh ? "" : "not ");
3271
3272 zebra_router_set_supports_nhgs(supports_nh);
3273
3274 return ret;
3275 }
3276
3277
3278 int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen,
3279 ns_id_t ns_id, uint8_t family, bool permanent)
3280 {
3281 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
3282 addr, lla, llalen, ns_id, family, permanent,
3283 RTPROT_ZEBRA);
3284 }
3285
3286 /**
3287 * netlink_neigh_update_msg_encode() - Common helper api for encoding
3288 * evpn neighbor update as netlink messages using dataplane context object.
3289 * Here, a neighbor refers to a bridge forwarding database entry for
3290 * either unicast forwarding or head-end replication or an IP neighbor
3291 * entry.
3292 * @ctx: Dataplane context
3293 * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
3294 * @lla: A pointer to neighbor cache link layer address
3295 * @llalen: Length of the pointer to neighbor cache link layer
3296 * address
3297 * @ip: A neighbor cache n/w layer destination address
3298 * In the case of bridge FDB, this represnts the remote
3299 * VTEP IP.
3300 * @replace_obj: Whether NEW request should replace existing object or
3301 * add to the end of the list
3302 * @family: AF_* netlink family
3303 * @type: RTN_* route type
3304 * @flags: NTF_* flags
3305 * @state: NUD_* states
3306 * @data: data buffer pointer
3307 * @datalen: total amount of data buffer space
3308 * @protocol: protocol information
3309 *
3310 * Return: 0 when the msg doesn't fit entirely in the buffer
3311 * otherwise the number of bytes written to buf.
3312 */
3313 static ssize_t netlink_neigh_update_msg_encode(
3314 const struct zebra_dplane_ctx *ctx, int cmd, const void *lla,
3315 int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family,
3316 uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy,
3317 uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data,
3318 size_t datalen, uint8_t protocol)
3319 {
3320 struct {
3321 struct nlmsghdr n;
3322 struct ndmsg ndm;
3323 char buf[];
3324 } *req = data;
3325 int ipa_len;
3326 enum dplane_op_e op;
3327
3328 if (datalen < sizeof(*req))
3329 return 0;
3330 memset(req, 0, sizeof(*req));
3331
3332 op = dplane_ctx_get_op(ctx);
3333
3334 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3335 req->n.nlmsg_flags = NLM_F_REQUEST;
3336 if (cmd == RTM_NEWNEIGH)
3337 req->n.nlmsg_flags |=
3338 NLM_F_CREATE
3339 | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
3340 req->n.nlmsg_type = cmd;
3341 req->ndm.ndm_family = family;
3342 req->ndm.ndm_type = type;
3343 req->ndm.ndm_state = state;
3344 req->ndm.ndm_flags = flags;
3345 req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
3346
3347 if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
3348 sizeof(protocol)))
3349 return 0;
3350
3351 if (lla) {
3352 if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen))
3353 return 0;
3354 }
3355
3356 if (nfy) {
3357 struct rtattr *nest;
3358
3359 nest = nl_attr_nest(&req->n, datalen,
3360 NDA_FDB_EXT_ATTRS | NLA_F_NESTED);
3361 if (!nest)
3362 return 0;
3363
3364 if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY,
3365 &nfy_flags, sizeof(nfy_flags)))
3366 return 0;
3367 if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0))
3368 return 0;
3369
3370 nl_attr_nest_end(&req->n, nest);
3371 }
3372
3373
3374 if (ext) {
3375 if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags,
3376 sizeof(ext_flags)))
3377 return 0;
3378 }
3379
3380 if (nhg_id) {
3381 if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
3382 return 0;
3383 } else {
3384 ipa_len =
3385 IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
3386 if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr,
3387 ipa_len))
3388 return 0;
3389 }
3390
3391 if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
3392 vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
3393
3394 if (vid > 0) {
3395 if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
3396 return 0;
3397 }
3398
3399 if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
3400 dplane_ctx_mac_get_br_ifindex(ctx)))
3401 return 0;
3402 }
3403
3404 return NLMSG_ALIGN(req->n.nlmsg_len);
3405 }
3406
3407 /*
3408 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
3409 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
3410 */
3411 static ssize_t
3412 netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd,
3413 void *buf, size_t buflen)
3414 {
3415 struct ethaddr dst_mac = {.octet = {0}};
3416 int proto = RTPROT_ZEBRA;
3417
3418 if (dplane_ctx_get_type(ctx) != 0)
3419 proto = zebra2proto(dplane_ctx_get_type(ctx));
3420
3421 return netlink_neigh_update_msg_encode(
3422 ctx, cmd, (const void *)&dst_mac, ETH_ALEN,
3423 dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF,
3424 (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/,
3425 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen,
3426 proto);
3427 }
3428
3429 #ifndef NDA_RTA
3430 #define NDA_RTA(r) \
3431 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
3432 #endif
3433
3434 static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3435 {
3436 struct ndmsg *ndm;
3437 struct interface *ifp;
3438 struct zebra_if *zif;
3439 struct rtattr *tb[NDA_MAX + 1];
3440 struct interface *br_if;
3441 struct ethaddr mac;
3442 vlanid_t vid = 0;
3443 struct in_addr vtep_ip;
3444 int vid_present = 0, dst_present = 0;
3445 char vid_buf[20];
3446 char dst_buf[30];
3447 bool sticky;
3448 bool local_inactive = false;
3449 bool dp_static = false;
3450 uint32_t nhg_id = 0;
3451
3452 ndm = NLMSG_DATA(h);
3453
3454 /* We only process macfdb notifications if EVPN is enabled */
3455 if (!is_evpn_enabled())
3456 return 0;
3457
3458 /* Parse attributes and extract fields of interest. Do basic
3459 * validation of the fields.
3460 */
3461 netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len,
3462 NLA_F_NESTED);
3463
3464 if (!tb[NDA_LLADDR]) {
3465 if (IS_ZEBRA_DEBUG_KERNEL)
3466 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
3467 nl_msg_type_to_str(h->nlmsg_type),
3468 ndm->ndm_ifindex);
3469 return 0;
3470 }
3471
3472 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
3473 if (IS_ZEBRA_DEBUG_KERNEL)
3474 zlog_debug(
3475 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
3476 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
3477 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
3478 return 0;
3479 }
3480
3481 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
3482
3483 if (tb[NDA_VLAN]) {
3484 vid_present = 1;
3485 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
3486 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3487 }
3488
3489 if (tb[NDA_DST]) {
3490 /* TODO: Only IPv4 supported now. */
3491 dst_present = 1;
3492 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
3493 IPV4_MAX_BYTELEN);
3494 snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4",
3495 &vtep_ip);
3496 }
3497
3498 if (tb[NDA_NH_ID])
3499 nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
3500
3501 if (ndm->ndm_state & NUD_STALE)
3502 local_inactive = true;
3503
3504 if (tb[NDA_FDB_EXT_ATTRS]) {
3505 struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS];
3506 struct rtattr *nfea_tb[NFEA_MAX + 1] = {0};
3507
3508 netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr);
3509 if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
3510 uint8_t nfy_flags;
3511
3512 nfy_flags = *(uint8_t *)RTA_DATA(
3513 nfea_tb[NFEA_ACTIVITY_NOTIFY]);
3514 if (nfy_flags & FDB_NOTIFY_BIT)
3515 dp_static = true;
3516 if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT)
3517 local_inactive = true;
3518 }
3519 }
3520
3521 if (IS_ZEBRA_DEBUG_KERNEL)
3522 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d",
3523 nl_msg_type_to_str(h->nlmsg_type),
3524 ndm->ndm_ifindex, vid_present ? vid_buf : "",
3525 ndm->ndm_state, ndm->ndm_flags, &mac,
3526 dst_present ? dst_buf : "", nhg_id);
3527
3528 /* The interface should exist. */
3529 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3530 ndm->ndm_ifindex);
3531 if (!ifp || !ifp->info)
3532 return 0;
3533
3534 /* The interface should be something we're interested in. */
3535 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
3536 return 0;
3537
3538 zif = (struct zebra_if *)ifp->info;
3539 if ((br_if = zif->brslave_info.br_if) == NULL) {
3540 if (IS_ZEBRA_DEBUG_KERNEL)
3541 zlog_debug(
3542 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
3543 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
3544 ndm->ndm_ifindex,
3545 zif->brslave_info.bridge_ifindex);
3546 return 0;
3547 }
3548
3549 sticky = !!(ndm->ndm_flags & NTF_STICKY);
3550
3551 if (filter_vlan && vid != filter_vlan) {
3552 if (IS_ZEBRA_DEBUG_KERNEL)
3553 zlog_debug(" Filtered due to filter vlan: %d",
3554 filter_vlan);
3555 return 0;
3556 }
3557
3558 /* If add or update, do accordingly if learnt on a "local" interface; if
3559 * the notification is over VxLAN, this has to be related to
3560 * multi-homing,
3561 * so perform an implicit delete of any local entry (if it exists).
3562 */
3563 if (h->nlmsg_type == RTM_NEWNEIGH) {
3564 /* Drop "permanent" entries. */
3565 if (ndm->ndm_state & NUD_PERMANENT) {
3566 if (IS_ZEBRA_DEBUG_KERNEL)
3567 zlog_debug(
3568 " Dropping entry because of NUD_PERMANENT");
3569 return 0;
3570 }
3571
3572 if (IS_ZEBRA_IF_VXLAN(ifp))
3573 return zebra_vxlan_dp_network_mac_add(
3574 ifp, br_if, &mac, vid, nhg_id, sticky,
3575 !!(ndm->ndm_flags & NTF_EXT_LEARNED));
3576
3577 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
3578 sticky, local_inactive, dp_static);
3579 }
3580
3581 /* This is a delete notification.
3582 * Ignore the notification with IP dest as it may just signify that the
3583 * MAC has moved from remote to local. The exception is the special
3584 * all-zeros MAC that represents the BUM flooding entry; we may have
3585 * to readd it. Otherwise,
3586 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
3587 * 2. For a MAC over "local" interface, delete the mac
3588 * Note: We will get notifications from both bridge driver and VxLAN
3589 * driver.
3590 */
3591 if (nhg_id)
3592 return 0;
3593
3594 if (dst_present) {
3595 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
3596
3597 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
3598 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
3599 return 0;
3600 }
3601
3602 if (IS_ZEBRA_IF_VXLAN(ifp))
3603 return zebra_vxlan_dp_network_mac_del(ifp, br_if, &mac, vid);
3604
3605 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
3606 }
3607
3608 static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3609 {
3610 int len;
3611 struct ndmsg *ndm;
3612
3613 if (h->nlmsg_type != RTM_NEWNEIGH)
3614 return 0;
3615
3616 /* Length validity. */
3617 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3618 if (len < 0)
3619 return -1;
3620
3621 /* We are interested only in AF_BRIDGE notifications. */
3622 ndm = NLMSG_DATA(h);
3623 if (ndm->ndm_family != AF_BRIDGE)
3624 return 0;
3625
3626 return netlink_macfdb_change(h, len, ns_id);
3627 }
3628
3629 /* Request for MAC FDB information from the kernel */
3630 static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
3631 int type, ifindex_t master_ifindex)
3632 {
3633 struct {
3634 struct nlmsghdr n;
3635 struct ifinfomsg ifm;
3636 char buf[256];
3637 } req;
3638
3639 /* Form the request, specifying filter (rtattr) if needed. */
3640 memset(&req, 0, sizeof(req));
3641 req.n.nlmsg_type = type;
3642 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3643 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
3644 req.ifm.ifi_family = family;
3645 if (master_ifindex)
3646 nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
3647
3648 return netlink_request(netlink_cmd, &req);
3649 }
3650
3651 /*
3652 * MAC forwarding database read using netlink interface. This is invoked
3653 * at startup.
3654 */
3655 int netlink_macfdb_read(struct zebra_ns *zns)
3656 {
3657 int ret;
3658 struct zebra_dplane_info dp_info;
3659
3660 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3661
3662 /* Get bridge FDB table. */
3663 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3664 0);
3665 if (ret < 0)
3666 return ret;
3667 /* We are reading entire table. */
3668 filter_vlan = 0;
3669 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3670 &dp_info, 0, true);
3671
3672 return ret;
3673 }
3674
3675 /*
3676 * MAC forwarding database read using netlink interface. This is for a
3677 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3678 */
3679 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3680 struct interface *br_if)
3681 {
3682 struct zebra_if *br_zif;
3683 struct zebra_if *zif;
3684 struct zebra_l2info_vxlan *vxl;
3685 struct zebra_dplane_info dp_info;
3686 int ret = 0;
3687
3688 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3689
3690 /* Save VLAN we're filtering on, if needed. */
3691 br_zif = (struct zebra_if *)br_if->info;
3692 zif = (struct zebra_if *)ifp->info;
3693 vxl = &zif->l2info.vxl;
3694 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3695 filter_vlan = vxl->access_vlan;
3696
3697 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3698 */
3699 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3700 br_if->ifindex);
3701 if (ret < 0)
3702 return ret;
3703 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3704 &dp_info, 0, false);
3705
3706 /* Reset VLAN filter. */
3707 filter_vlan = 0;
3708 return ret;
3709 }
3710
3711
3712 /* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3713 static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
3714 int family, int type,
3715 struct interface *br_if,
3716 const struct ethaddr *mac,
3717 vlanid_t vid)
3718 {
3719 struct {
3720 struct nlmsghdr n;
3721 struct ndmsg ndm;
3722 char buf[256];
3723 } req;
3724 struct zebra_if *br_zif;
3725
3726 memset(&req, 0, sizeof(req));
3727 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3728 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3729 req.n.nlmsg_flags = NLM_F_REQUEST;
3730 req.ndm.ndm_family = family; /* AF_BRIDGE */
3731 /* req.ndm.ndm_state = NUD_REACHABLE; */
3732
3733 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
3734
3735 br_zif = (struct zebra_if *)br_if->info;
3736 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
3737 nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
3738
3739 nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
3740
3741 if (IS_ZEBRA_DEBUG_KERNEL)
3742 zlog_debug(
3743 "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %pEA vid %u",
3744 __func__, nl_family_to_str(req.ndm.ndm_family),
3745 br_if->name, br_if->ifindex, br_if->vrf->name,
3746 br_if->vrf->vrf_id, mac, vid);
3747
3748 return netlink_request(&zns->netlink_cmd, &req);
3749 }
3750
3751 int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3752 struct interface *br_if,
3753 const struct ethaddr *mac, vlanid_t vid)
3754 {
3755 int ret = 0;
3756 struct zebra_dplane_info dp_info;
3757
3758 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3759
3760 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3761 */
3762 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
3763 RTM_GETNEIGH,
3764 br_if, mac, vid);
3765 if (ret < 0)
3766 return ret;
3767
3768 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3769 &dp_info, 1, false);
3770
3771 return ret;
3772 }
3773
3774 /*
3775 * Netlink-specific handler for MAC updates using dataplane context object.
3776 */
3777 ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data,
3778 size_t datalen)
3779 {
3780 struct ipaddr vtep_ip;
3781 vlanid_t vid;
3782 ssize_t total;
3783 int cmd;
3784 uint8_t flags;
3785 uint16_t state;
3786 uint32_t nhg_id;
3787 uint32_t update_flags;
3788 bool nfy = false;
3789 uint8_t nfy_flags = 0;
3790 int proto = RTPROT_ZEBRA;
3791
3792 if (dplane_ctx_get_type(ctx) != 0)
3793 proto = zebra2proto(dplane_ctx_get_type(ctx));
3794
3795 cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3796 ? RTM_NEWNEIGH : RTM_DELNEIGH;
3797
3798 flags = NTF_MASTER;
3799 state = NUD_REACHABLE;
3800
3801 update_flags = dplane_ctx_mac_get_update_flags(ctx);
3802 if (update_flags & DPLANE_MAC_REMOTE) {
3803 flags |= NTF_SELF;
3804 if (dplane_ctx_mac_is_sticky(ctx)) {
3805 /* NUD_NOARP prevents the entry from expiring */
3806 state |= NUD_NOARP;
3807 /* sticky the entry from moving */
3808 flags |= NTF_STICKY;
3809 } else {
3810 flags |= NTF_EXT_LEARNED;
3811 }
3812 /* if it was static-local previously we need to clear the
3813 * notify flags on replace with remote
3814 */
3815 if (update_flags & DPLANE_MAC_WAS_STATIC)
3816 nfy = true;
3817 } else {
3818 /* local mac */
3819 if (update_flags & DPLANE_MAC_SET_STATIC) {
3820 nfy_flags |= FDB_NOTIFY_BIT;
3821 state |= NUD_NOARP;
3822 }
3823
3824 if (update_flags & DPLANE_MAC_SET_INACTIVE)
3825 nfy_flags |= FDB_NOTIFY_INACTIVE_BIT;
3826
3827 nfy = true;
3828 }
3829
3830 nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
3831 vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
3832 SET_IPADDR_V4(&vtep_ip);
3833
3834 if (IS_ZEBRA_DEBUG_KERNEL) {
3835 char vid_buf[20];
3836 const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
3837
3838 vid = dplane_ctx_mac_get_vlan(ctx);
3839 if (vid > 0)
3840 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3841 else
3842 vid_buf[0] = '\0';
3843
3844 zlog_debug(
3845 "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s",
3846 nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
3847 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3848 vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
3849 mac, &vtep_ip, nhg_id,
3850 (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "",
3851 (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync"
3852 : "",
3853 (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "",
3854 (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive"
3855 : "",
3856 nfy ? " nfy" : "");
3857 }
3858
3859 total = netlink_neigh_update_msg_encode(
3860 ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN,
3861 &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy,
3862 nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen,
3863 proto);
3864
3865 return total;
3866 }
3867
3868 /*
3869 * In the event the kernel deletes ipv4 link-local neighbor entries created for
3870 * 5549 support, re-install them.
3871 */
3872 static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
3873 struct interface *ifp, struct ipaddr *ip,
3874 bool handle_failed)
3875 {
3876 if (ndm->ndm_family != AF_INET)
3877 return;
3878
3879 if (!zif->v6_2_v4_ll_neigh_entry)
3880 return;
3881
3882 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
3883 return;
3884
3885 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
3886 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
3887 ifp->name);
3888 return;
3889 }
3890
3891 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
3892 }
3893
3894 #define NUD_VALID \
3895 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
3896 | NUD_DELAY)
3897 #define NUD_LOCAL_ACTIVE \
3898 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
3899
3900 static int netlink_nbr_entry_state_to_zclient(int nbr_state)
3901 {
3902 /* an exact match is done between
3903 * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h)
3904 * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX
3905 * (see in lib/zclient.h)
3906 */
3907 return nbr_state;
3908 }
3909 static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3910 {
3911 struct ndmsg *ndm;
3912 struct interface *ifp;
3913 struct zebra_if *zif;
3914 struct rtattr *tb[NDA_MAX + 1];
3915 struct interface *link_if;
3916 struct ethaddr mac;
3917 struct ipaddr ip;
3918 char buf[ETHER_ADDR_STRLEN];
3919 int mac_present = 0;
3920 bool is_ext;
3921 bool is_router;
3922 bool local_inactive;
3923 uint32_t ext_flags = 0;
3924 bool dp_static = false;
3925 int l2_len = 0;
3926 int cmd;
3927
3928 ndm = NLMSG_DATA(h);
3929
3930 /* The interface should exist. */
3931 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3932 ndm->ndm_ifindex);
3933 if (!ifp || !ifp->info)
3934 return 0;
3935
3936 zif = (struct zebra_if *)ifp->info;
3937
3938 /* Parse attributes and extract fields of interest. */
3939 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3940
3941 if (!tb[NDA_DST]) {
3942 zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
3943 nl_msg_type_to_str(h->nlmsg_type),
3944 nl_family_to_str(ndm->ndm_family), ifp->name,
3945 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id);
3946 return 0;
3947 }
3948
3949 memset(&ip, 0, sizeof(ip));
3950 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3951 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3952
3953 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3954 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
3955 netlink_handle_5549(ndm, zif, ifp, &ip, false);
3956 if (IS_ZEBRA_DEBUG_KERNEL)
3957 zlog_debug(
3958 " Neighbor Entry Received is a 5549 entry, finished");
3959 return 0;
3960 }
3961
3962 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
3963 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3964 netlink_handle_5549(ndm, zif, ifp, &ip, true);
3965
3966 /* we send link layer information to client:
3967 * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH
3968 * - struct ipaddr ( for DEL and GET)
3969 * - struct ethaddr mac; (for NEW)
3970 */
3971 if (h->nlmsg_type == RTM_NEWNEIGH)
3972 cmd = ZEBRA_NHRP_NEIGH_ADDED;
3973 else if (h->nlmsg_type == RTM_GETNEIGH)
3974 cmd = ZEBRA_NHRP_NEIGH_GET;
3975 else if (h->nlmsg_type == RTM_DELNEIGH)
3976 cmd = ZEBRA_NHRP_NEIGH_REMOVED;
3977 else {
3978 zlog_debug("%s(): unknown nlmsg type %u", __func__,
3979 h->nlmsg_type);
3980 return 0;
3981 }
3982 if (tb[NDA_LLADDR]) {
3983 /* copy LLADDR information */
3984 l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]);
3985 }
3986 if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) {
3987 union sockunion link_layer_ipv4;
3988
3989 if (l2_len) {
3990 sockunion_family(&link_layer_ipv4) = AF_INET;
3991 memcpy((void *)sockunion_get_addr(&link_layer_ipv4),
3992 RTA_DATA(tb[NDA_LLADDR]), l2_len);
3993 } else
3994 sockunion_family(&link_layer_ipv4) = AF_UNSPEC;
3995 zsend_nhrp_neighbor_notify(
3996 cmd, ifp, &ip,
3997 netlink_nbr_entry_state_to_zclient(ndm->ndm_state),
3998 &link_layer_ipv4);
3999 }
4000
4001 if (h->nlmsg_type == RTM_GETNEIGH)
4002 return 0;
4003
4004 /* The neighbor is present on an SVI. From this, we locate the
4005 * underlying
4006 * bridge because we're only interested in neighbors on a VxLAN bridge.
4007 * The bridge is located based on the nature of the SVI:
4008 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
4009 * interface
4010 * and is linked to the bridge
4011 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
4012 * interface
4013 * itself
4014 */
4015 if (IS_ZEBRA_IF_VLAN(ifp)) {
4016 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
4017 zif->link_ifindex);
4018 if (!link_if)
4019 return 0;
4020 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
4021 link_if = ifp;
4022 else {
4023 link_if = NULL;
4024 if (IS_ZEBRA_DEBUG_KERNEL)
4025 zlog_debug(
4026 " Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
4027 }
4028
4029 memset(&mac, 0, sizeof(mac));
4030 if (h->nlmsg_type == RTM_NEWNEIGH) {
4031 if (tb[NDA_LLADDR]) {
4032 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
4033 if (IS_ZEBRA_DEBUG_KERNEL)
4034 zlog_debug(
4035 "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
4036 nl_msg_type_to_str(
4037 h->nlmsg_type),
4038 nl_family_to_str(
4039 ndm->ndm_family),
4040 ifp->name, ndm->ndm_ifindex,
4041 ifp->vrf->name,
4042 ifp->vrf->vrf_id,
4043 (unsigned long)RTA_PAYLOAD(
4044 tb[NDA_LLADDR]));
4045 return 0;
4046 }
4047
4048 mac_present = 1;
4049 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
4050 }
4051
4052 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
4053 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
4054
4055 if (tb[NDA_EXT_FLAGS]) {
4056 ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]);
4057 if (ext_flags & NTF_E_MH_PEER_SYNC)
4058 dp_static = true;
4059 }
4060
4061 if (IS_ZEBRA_DEBUG_KERNEL)
4062 zlog_debug(
4063 "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x",
4064 nl_msg_type_to_str(h->nlmsg_type),
4065 nl_family_to_str(ndm->ndm_family), ifp->name,
4066 ndm->ndm_ifindex, ifp->vrf->name,
4067 ifp->vrf->vrf_id, &ip,
4068 mac_present
4069 ? prefix_mac2str(&mac, buf, sizeof(buf))
4070 : "",
4071 ndm->ndm_state, ndm->ndm_flags, ext_flags);
4072
4073 /* If the neighbor state is valid for use, process as an add or
4074 * update
4075 * else process as a delete. Note that the delete handling may
4076 * result
4077 * in re-adding the neighbor if it is a valid "remote" neighbor.
4078 */
4079 if (ndm->ndm_state & NUD_VALID) {
4080 if (zebra_evpn_mh_do_adv_reachable_neigh_only())
4081 local_inactive =
4082 !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
4083 else
4084 /* If EVPN-MH is not enabled we treat STALE
4085 * neighbors as locally-active and advertise
4086 * them
4087 */
4088 local_inactive = false;
4089
4090 /* Add local neighbors to the l3 interface database */
4091 if (is_ext)
4092 zebra_neigh_del(ifp, &ip);
4093 else
4094 zebra_neigh_add(ifp, &ip, &mac);
4095
4096 if (link_if)
4097 zebra_vxlan_handle_kernel_neigh_update(
4098 ifp, link_if, &ip, &mac, ndm->ndm_state,
4099 is_ext, is_router, local_inactive,
4100 dp_static);
4101 return 0;
4102 }
4103
4104
4105 zebra_neigh_del(ifp, &ip);
4106 if (link_if)
4107 zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4108 return 0;
4109 }
4110
4111 if (IS_ZEBRA_DEBUG_KERNEL)
4112 zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA",
4113 nl_msg_type_to_str(h->nlmsg_type),
4114 nl_family_to_str(ndm->ndm_family), ifp->name,
4115 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id,
4116 &ip);
4117
4118 /* Process the delete - it may result in re-adding the neighbor if it is
4119 * a valid "remote" neighbor.
4120 */
4121 zebra_neigh_del(ifp, &ip);
4122 if (link_if)
4123 zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4124
4125 return 0;
4126 }
4127
4128 static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
4129 {
4130 int len;
4131 struct ndmsg *ndm;
4132
4133 if (h->nlmsg_type != RTM_NEWNEIGH)
4134 return 0;
4135
4136 /* Length validity. */
4137 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4138 if (len < 0)
4139 return -1;
4140
4141 /* We are interested only in AF_INET or AF_INET6 notifications. */
4142 ndm = NLMSG_DATA(h);
4143 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
4144 return 0;
4145
4146 return netlink_neigh_change(h, len);
4147 }
4148
4149 /* Request for IP neighbor information from the kernel */
4150 static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
4151 int type, ifindex_t ifindex)
4152 {
4153 struct {
4154 struct nlmsghdr n;
4155 struct ndmsg ndm;
4156 char buf[256];
4157 } req;
4158
4159 /* Form the request, specifying filter (rtattr) if needed. */
4160 memset(&req, 0, sizeof(req));
4161 req.n.nlmsg_type = type;
4162 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
4163 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4164 req.ndm.ndm_family = family;
4165 if (ifindex)
4166 nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
4167
4168 return netlink_request(netlink_cmd, &req);
4169 }
4170
4171 /*
4172 * IP Neighbor table read using netlink interface. This is invoked
4173 * at startup.
4174 */
4175 int netlink_neigh_read(struct zebra_ns *zns)
4176 {
4177 int ret;
4178 struct zebra_dplane_info dp_info;
4179
4180 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4181
4182 /* Get IP neighbor table. */
4183 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4184 0);
4185 if (ret < 0)
4186 return ret;
4187 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4188 &dp_info, 0, true);
4189
4190 return ret;
4191 }
4192
4193 /*
4194 * IP Neighbor table read using netlink interface. This is for a specific
4195 * VLAN device.
4196 */
4197 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
4198 {
4199 int ret = 0;
4200 struct zebra_dplane_info dp_info;
4201
4202 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4203
4204 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4205 vlan_if->ifindex);
4206 if (ret < 0)
4207 return ret;
4208 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4209 &dp_info, 0, false);
4210
4211 return ret;
4212 }
4213
4214 /*
4215 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
4216 * read using netlink interface.
4217 */
4218 static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
4219 int type,
4220 const struct ipaddr *ip,
4221 ifindex_t ifindex)
4222 {
4223 struct {
4224 struct nlmsghdr n;
4225 struct ndmsg ndm;
4226 char buf[256];
4227 } req;
4228 int ipa_len;
4229
4230 /* Form the request, specifying filter (rtattr) if needed. */
4231 memset(&req, 0, sizeof(req));
4232 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4233 req.n.nlmsg_flags = NLM_F_REQUEST;
4234 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
4235 req.ndm.ndm_ifindex = ifindex;
4236
4237 if (IS_IPADDR_V4(ip)) {
4238 ipa_len = IPV4_MAX_BYTELEN;
4239 req.ndm.ndm_family = AF_INET;
4240
4241 } else {
4242 ipa_len = IPV6_MAX_BYTELEN;
4243 req.ndm.ndm_family = AF_INET6;
4244 }
4245
4246 nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
4247
4248 if (IS_ZEBRA_DEBUG_KERNEL)
4249 zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x",
4250 __func__, nl_msg_type_to_str(type),
4251 nl_family_to_str(req.ndm.ndm_family), ifindex, ip,
4252 req.n.nlmsg_flags);
4253
4254 return netlink_request(&zns->netlink_cmd, &req);
4255 }
4256
4257 int netlink_neigh_read_specific_ip(const struct ipaddr *ip,
4258 struct interface *vlan_if)
4259 {
4260 int ret = 0;
4261 struct zebra_ns *zns;
4262 struct zebra_vrf *zvrf = vlan_if->vrf->info;
4263 struct zebra_dplane_info dp_info;
4264
4265 zns = zvrf->zns;
4266
4267 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4268
4269 if (IS_ZEBRA_DEBUG_KERNEL)
4270 zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)",
4271 __func__, vlan_if->name, vlan_if->ifindex, ip,
4272 vlan_if->vrf->name, vlan_if->vrf->vrf_id);
4273
4274 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
4275 vlan_if->ifindex);
4276 if (ret < 0)
4277 return ret;
4278
4279 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4280 &dp_info, 1, false);
4281
4282 return ret;
4283 }
4284
4285 int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
4286 {
4287 int len;
4288 struct ndmsg *ndm;
4289
4290 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH
4291 || h->nlmsg_type == RTM_GETNEIGH))
4292 return 0;
4293
4294 /* Length validity. */
4295 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4296 if (len < 0) {
4297 zlog_err(
4298 "%s: Message received from netlink is of a broken size %d %zu",
4299 __func__, h->nlmsg_len,
4300 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
4301 return -1;
4302 }
4303
4304 /* Is this a notification for the MAC FDB or IP neighbor table? */
4305 ndm = NLMSG_DATA(h);
4306 if (ndm->ndm_family == AF_BRIDGE)
4307 return netlink_macfdb_change(h, len, ns_id);
4308
4309 if (ndm->ndm_type != RTN_UNICAST)
4310 return 0;
4311
4312 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
4313 return netlink_ipneigh_change(h, len, ns_id);
4314 else {
4315 flog_warn(
4316 EC_ZEBRA_UNKNOWN_FAMILY,
4317 "Invalid address family: %u received from kernel neighbor change: %s",
4318 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
4319 return 0;
4320 }
4321
4322 return 0;
4323 }
4324
4325 /*
4326 * Utility neighbor-update function, using info from dplane context.
4327 */
4328 static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
4329 int cmd, void *buf, size_t buflen)
4330 {
4331 const struct ipaddr *ip;
4332 const struct ethaddr *mac = NULL;
4333 const struct ipaddr *link_ip = NULL;
4334 const void *link_ptr = NULL;
4335 char buf2[ETHER_ADDR_STRLEN];
4336
4337 int llalen;
4338 uint8_t flags;
4339 uint16_t state;
4340 uint8_t family;
4341 uint32_t update_flags;
4342 uint32_t ext_flags = 0;
4343 bool ext = false;
4344 int proto = RTPROT_ZEBRA;
4345
4346 if (dplane_ctx_get_type(ctx) != 0)
4347 proto = zebra2proto(dplane_ctx_get_type(ctx));
4348
4349 ip = dplane_ctx_neigh_get_ipaddr(ctx);
4350
4351 if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL
4352 || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) {
4353 link_ip = dplane_ctx_neigh_get_link_ip(ctx);
4354 llalen = IPADDRSZ(link_ip);
4355 link_ptr = (const void *)&(link_ip->ip.addr);
4356 ipaddr2str(link_ip, buf2, sizeof(buf2));
4357 } else {
4358 mac = dplane_ctx_neigh_get_mac(ctx);
4359 llalen = ETH_ALEN;
4360 link_ptr = (const void *)mac;
4361 if (is_zero_mac(mac))
4362 mac = NULL;
4363 if (mac)
4364 prefix_mac2str(mac, buf2, sizeof(buf2));
4365 else
4366 snprintf(buf2, sizeof(buf2), "null");
4367 }
4368 update_flags = dplane_ctx_neigh_get_update_flags(ctx);
4369 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
4370 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
4371
4372 family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
4373
4374 if (update_flags & DPLANE_NEIGH_REMOTE) {
4375 flags |= NTF_EXT_LEARNED;
4376 /* if it was static-local previously we need to clear the
4377 * ext flags on replace with remote
4378 */
4379 if (update_flags & DPLANE_NEIGH_WAS_STATIC)
4380 ext = true;
4381 } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) {
4382 ext = true;
4383 /* local neigh */
4384 if (update_flags & DPLANE_NEIGH_SET_STATIC)
4385 ext_flags |= NTF_E_MH_PEER_SYNC;
4386 }
4387 if (IS_ZEBRA_DEBUG_KERNEL)
4388 zlog_debug(
4389 "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x",
4390 nl_msg_type_to_str(cmd), nl_family_to_str(family),
4391 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
4392 ip, link_ip ? "Link" : "MAC", buf2, flags, state,
4393 ext ? "ext " : "", ext_flags);
4394
4395 return netlink_neigh_update_msg_encode(
4396 ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST,
4397 flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext,
4398 ext_flags, buf, buflen, proto);
4399 }
4400
4401 static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx,
4402 void *data, size_t datalen)
4403 {
4404 struct {
4405 struct nlmsghdr n;
4406 struct ndtmsg ndtm;
4407 char buf[];
4408 } *req = data;
4409 struct rtattr *nest;
4410 uint8_t family;
4411 ifindex_t idx;
4412 uint32_t val;
4413
4414 if (datalen < sizeof(*req))
4415 return 0;
4416 memset(req, 0, sizeof(*req));
4417 family = dplane_ctx_neightable_get_family(ctx);
4418 idx = dplane_ctx_get_ifindex(ctx);
4419
4420 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
4421 req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE;
4422 req->n.nlmsg_type = RTM_SETNEIGHTBL;
4423 req->ndtm.ndtm_family = family;
4424
4425 nl_attr_put(&req->n, datalen, NDTA_NAME,
4426 family == AF_INET ? "arp_cache" : "ndisc_cache", 10);
4427 nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS);
4428 if (nest == NULL)
4429 return 0;
4430 if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx)))
4431 return 0;
4432 val = dplane_ctx_neightable_get_app_probes(ctx);
4433 if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val)))
4434 return 0;
4435 val = dplane_ctx_neightable_get_mcast_probes(ctx);
4436 if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val,
4437 sizeof(val)))
4438 return 0;
4439 val = dplane_ctx_neightable_get_ucast_probes(ctx);
4440 if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val,
4441 sizeof(val)))
4442 return 0;
4443 nl_attr_nest_end(&req->n, nest);
4444
4445 return NLMSG_ALIGN(req->n.nlmsg_len);
4446 }
4447
4448 static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx,
4449 void *buf, size_t buflen)
4450 {
4451 ssize_t ret;
4452
4453 switch (dplane_ctx_get_op(ctx)) {
4454 case DPLANE_OP_NEIGH_INSTALL:
4455 case DPLANE_OP_NEIGH_UPDATE:
4456 case DPLANE_OP_NEIGH_DISCOVER:
4457 case DPLANE_OP_NEIGH_IP_INSTALL:
4458 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen);
4459 break;
4460 case DPLANE_OP_NEIGH_DELETE:
4461 case DPLANE_OP_NEIGH_IP_DELETE:
4462 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen);
4463 break;
4464 case DPLANE_OP_VTEP_ADD:
4465 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf,
4466 buflen);
4467 break;
4468 case DPLANE_OP_VTEP_DELETE:
4469 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf,
4470 buflen);
4471 break;
4472 case DPLANE_OP_NEIGH_TABLE_UPDATE:
4473 ret = netlink_neigh_table_update_ctx(ctx, buf, buflen);
4474 break;
4475 default:
4476 ret = -1;
4477 }
4478
4479 return ret;
4480 }
4481
4482 /*
4483 * Update MAC, using dataplane context object.
4484 */
4485
4486 enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth,
4487 struct zebra_dplane_ctx *ctx)
4488 {
4489 return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx,
4490 false);
4491 }
4492
4493 enum netlink_msg_status
4494 netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
4495 {
4496 return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder,
4497 false);
4498 }
4499
4500 /*
4501 * MPLS label forwarding table change via netlink interface, using dataplane
4502 * context information.
4503 */
4504 ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
4505 void *buf, size_t buflen)
4506 {
4507 mpls_lse_t lse;
4508 const struct nhlfe_list_head *head;
4509 const struct zebra_nhlfe *nhlfe;
4510 struct nexthop *nexthop = NULL;
4511 unsigned int nexthop_num;
4512 const char *routedesc;
4513 int route_type;
4514 struct prefix p = {0};
4515 struct nlsock *nl =
4516 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
4517
4518 struct {
4519 struct nlmsghdr n;
4520 struct rtmsg r;
4521 char buf[0];
4522 } *req = buf;
4523
4524 if (buflen < sizeof(*req))
4525 return 0;
4526
4527 memset(req, 0, sizeof(*req));
4528
4529 /*
4530 * Count # nexthops so we can decide whether to use singlepath
4531 * or multipath case.
4532 */
4533 nexthop_num = 0;
4534 head = dplane_ctx_get_nhlfe_list(ctx);
4535 frr_each(nhlfe_list_const, head, nhlfe) {
4536 nexthop = nhlfe->nexthop;
4537 if (!nexthop)
4538 continue;
4539 if (cmd == RTM_NEWROUTE) {
4540 /* Count all selected NHLFEs */
4541 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4542 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
4543 nexthop_num++;
4544 } else { /* DEL */
4545 /* Count all installed NHLFEs */
4546 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
4547 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
4548 nexthop_num++;
4549 }
4550 }
4551
4552 if ((nexthop_num == 0) ||
4553 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
4554 return 0;
4555
4556 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
4557 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
4558 req->n.nlmsg_type = cmd;
4559 req->n.nlmsg_pid = nl->snl.nl_pid;
4560
4561 req->r.rtm_family = AF_MPLS;
4562 req->r.rtm_table = RT_TABLE_MAIN;
4563 req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
4564 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
4565 req->r.rtm_type = RTN_UNICAST;
4566
4567 if (cmd == RTM_NEWROUTE) {
4568 /* We do a replace to handle update. */
4569 req->n.nlmsg_flags |= NLM_F_REPLACE;
4570
4571 /* set the protocol value if installing */
4572 route_type = re_type_from_lsp_type(
4573 dplane_ctx_get_best_nhlfe(ctx)->type);
4574 req->r.rtm_protocol = zebra2proto(route_type);
4575 }
4576
4577 /* Fill destination */
4578 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
4579 if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
4580 return 0;
4581
4582 /* Fill nexthops (paths) based on single-path or multipath. The paths
4583 * chosen depend on the operation.
4584 */
4585 if (nexthop_num == 1) {
4586 routedesc = "single-path";
4587 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4588 routedesc);
4589
4590 nexthop_num = 0;
4591 frr_each(nhlfe_list_const, head, nhlfe) {
4592 nexthop = nhlfe->nexthop;
4593 if (!nexthop)
4594 continue;
4595
4596 if ((cmd == RTM_NEWROUTE
4597 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4598 && CHECK_FLAG(nexthop->flags,
4599 NEXTHOP_FLAG_ACTIVE)))
4600 || (cmd == RTM_DELROUTE
4601 && (CHECK_FLAG(nhlfe->flags,
4602 NHLFE_FLAG_INSTALLED)
4603 && CHECK_FLAG(nexthop->flags,
4604 NEXTHOP_FLAG_FIB)))) {
4605 /* Add the gateway */
4606 if (!_netlink_mpls_build_singlepath(
4607 &p, routedesc, nhlfe, &req->n,
4608 &req->r, buflen, cmd))
4609 return false;
4610
4611 nexthop_num++;
4612 break;
4613 }
4614 }
4615 } else { /* Multipath case */
4616 struct rtattr *nest;
4617 const union g_addr *src1 = NULL;
4618
4619 nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
4620 if (!nest)
4621 return 0;
4622
4623 routedesc = "multipath";
4624 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4625 routedesc);
4626
4627 nexthop_num = 0;
4628 frr_each(nhlfe_list_const, head, nhlfe) {
4629 nexthop = nhlfe->nexthop;
4630 if (!nexthop)
4631 continue;
4632
4633 if ((cmd == RTM_NEWROUTE
4634 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4635 && CHECK_FLAG(nexthop->flags,
4636 NEXTHOP_FLAG_ACTIVE)))
4637 || (cmd == RTM_DELROUTE
4638 && (CHECK_FLAG(nhlfe->flags,
4639 NHLFE_FLAG_INSTALLED)
4640 && CHECK_FLAG(nexthop->flags,
4641 NEXTHOP_FLAG_FIB)))) {
4642 nexthop_num++;
4643
4644 /* Build the multipath */
4645 if (!_netlink_mpls_build_multipath(
4646 &p, routedesc, nhlfe, &req->n,
4647 buflen, &req->r, &src1))
4648 return 0;
4649 }
4650 }
4651
4652 /* Add the multipath */
4653 nl_attr_nest_end(&req->n, nest);
4654 }
4655
4656 return NLMSG_ALIGN(req->n.nlmsg_len);
4657 }
4658
4659 /****************************************************************************
4660 * This code was developed in a branch that didn't have dplane APIs for
4661 * MAC updates. Hence the use of the legacy style. It will be moved to
4662 * the new dplane style pre-merge to master. XXX
4663 */
4664 static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
4665 {
4666 struct {
4667 struct nlmsghdr n;
4668 struct nhmsg nhm;
4669 char buf[256];
4670 } req;
4671 int cmd = RTM_NEWNEXTHOP;
4672 struct zebra_vrf *zvrf;
4673 struct zebra_ns *zns;
4674
4675 zvrf = zebra_vrf_get_evpn();
4676 zns = zvrf->zns;
4677
4678 memset(&req, 0, sizeof(req));
4679
4680 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4681 req.n.nlmsg_flags = NLM_F_REQUEST;
4682 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4683 req.n.nlmsg_type = cmd;
4684 req.nhm.nh_family = AF_INET;
4685
4686 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4687 return -1;
4688 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4689 return -1;
4690 if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
4691 &vtep_ip, IPV4_MAX_BYTELEN))
4692 return -1;
4693
4694 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4695 zlog_debug("Tx %s fdb-nh 0x%x %pI4",
4696 nl_msg_type_to_str(cmd), nh_id, &vtep_ip);
4697 }
4698
4699 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4700 false);
4701 }
4702
4703 static int netlink_fdb_nh_del(uint32_t nh_id)
4704 {
4705 struct {
4706 struct nlmsghdr n;
4707 struct nhmsg nhm;
4708 char buf[256];
4709 } req;
4710 int cmd = RTM_DELNEXTHOP;
4711 struct zebra_vrf *zvrf;
4712 struct zebra_ns *zns;
4713
4714 zvrf = zebra_vrf_get_evpn();
4715 zns = zvrf->zns;
4716
4717 memset(&req, 0, sizeof(req));
4718
4719 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4720 req.n.nlmsg_flags = NLM_F_REQUEST;
4721 req.n.nlmsg_type = cmd;
4722 req.nhm.nh_family = AF_UNSPEC;
4723
4724 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4725 return -1;
4726
4727 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4728 zlog_debug("Tx %s fdb-nh 0x%x",
4729 nl_msg_type_to_str(cmd), nh_id);
4730 }
4731
4732 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4733 false);
4734 }
4735
4736 static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
4737 struct nh_grp *nh_ids)
4738 {
4739 struct {
4740 struct nlmsghdr n;
4741 struct nhmsg nhm;
4742 char buf[256];
4743 } req;
4744 int cmd = RTM_NEWNEXTHOP;
4745 struct zebra_vrf *zvrf;
4746 struct zebra_ns *zns;
4747 struct nexthop_grp grp[nh_cnt];
4748 uint32_t i;
4749
4750 zvrf = zebra_vrf_get_evpn();
4751 zns = zvrf->zns;
4752
4753 memset(&req, 0, sizeof(req));
4754
4755 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4756 req.n.nlmsg_flags = NLM_F_REQUEST;
4757 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4758 req.n.nlmsg_type = cmd;
4759 req.nhm.nh_family = AF_UNSPEC;
4760
4761 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
4762 return -1;
4763 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4764 return -1;
4765 memset(&grp, 0, sizeof(grp));
4766 for (i = 0; i < nh_cnt; ++i) {
4767 grp[i].id = nh_ids[i].id;
4768 grp[i].weight = nh_ids[i].weight;
4769 }
4770 if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
4771 grp, nh_cnt * sizeof(struct nexthop_grp)))
4772 return -1;
4773
4774
4775 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4776 char vtep_str[ES_VTEP_LIST_STR_SZ];
4777 char nh_buf[16];
4778
4779 vtep_str[0] = '\0';
4780 for (i = 0; i < nh_cnt; ++i) {
4781 snprintf(nh_buf, sizeof(nh_buf), "%u ",
4782 grp[i].id);
4783 strlcat(vtep_str, nh_buf, sizeof(vtep_str));
4784 }
4785
4786 zlog_debug("Tx %s fdb-nhg 0x%x %s",
4787 nl_msg_type_to_str(cmd), nhg_id, vtep_str);
4788 }
4789
4790 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4791 false);
4792 }
4793
4794 static int netlink_fdb_nhg_del(uint32_t nhg_id)
4795 {
4796 return netlink_fdb_nh_del(nhg_id);
4797 }
4798
4799 int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
4800 {
4801 return netlink_fdb_nh_update(nh_id, vtep_ip);
4802 }
4803
4804 int kernel_del_mac_nh(uint32_t nh_id)
4805 {
4806 return netlink_fdb_nh_del(nh_id);
4807 }
4808
4809 int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
4810 struct nh_grp *nh_ids)
4811 {
4812 return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
4813 }
4814
4815 int kernel_del_mac_nhg(uint32_t nhg_id)
4816 {
4817 return netlink_fdb_nhg_del(nhg_id);
4818 }
4819
4820 #endif /* HAVE_NETLINK */