]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
build, vtysh: extract vtysh commands from .xref
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 /* The following definition is to workaround an issue in the Linux kernel
26 * header files with redefinition of 'struct in6_addr' in both
27 * netinet/in.h and linux/in6.h.
28 * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html
29 */
30 #define _LINUX_IN6_H
31
32 #include <net/if_arp.h>
33 #include <linux/lwtunnel.h>
34 #include <linux/mpls_iptunnel.h>
35 #include <linux/seg6_iptunnel.h>
36 #include <linux/seg6_local.h>
37 #include <linux/neighbour.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/nexthop.h>
40
41 /* Hack for GNU libc version 2. */
42 #ifndef MSG_TRUNC
43 #define MSG_TRUNC 0x20
44 #endif /* MSG_TRUNC */
45
46 #include "linklist.h"
47 #include "if.h"
48 #include "log.h"
49 #include "prefix.h"
50 #include "plist.h"
51 #include "plist_int.h"
52 #include "connected.h"
53 #include "table.h"
54 #include "memory.h"
55 #include "rib.h"
56 #include "thread.h"
57 #include "privs.h"
58 #include "nexthop.h"
59 #include "vrf.h"
60 #include "vty.h"
61 #include "mpls.h"
62 #include "vxlan.h"
63 #include "printfrr.h"
64
65 #include "zebra/zapi_msg.h"
66 #include "zebra/zebra_ns.h"
67 #include "zebra/zebra_vrf.h"
68 #include "zebra/rt.h"
69 #include "zebra/redistribute.h"
70 #include "zebra/interface.h"
71 #include "zebra/debug.h"
72 #include "zebra/rtadv.h"
73 #include "zebra/zebra_ptm.h"
74 #include "zebra/zebra_mpls.h"
75 #include "zebra/kernel_netlink.h"
76 #include "zebra/rt_netlink.h"
77 #include "zebra/zebra_nhg.h"
78 #include "zebra/zebra_mroute.h"
79 #include "zebra/zebra_vxlan.h"
80 #include "zebra/zebra_errors.h"
81 #include "zebra/zebra_evpn_mh.h"
82 #include "zebra/zebra_trace.h"
83 #include "zebra/zebra_neigh.h"
84
85 #ifndef AF_MPLS
86 #define AF_MPLS 28
87 #endif
88
89 /* Re-defining as I am unable to include <linux/if_bridge.h> which has the
90 * UAPI for MAC sync. */
91 #ifndef _UAPI_LINUX_IF_BRIDGE_H
92 #define BR_SPH_LIST_SIZE 10
93 #endif
94
95 static vlanid_t filter_vlan = 0;
96
97 /* We capture whether the current kernel supports nexthop ids; by
98 * default, we'll use them if possible. There's also a configuration
99 * available to _disable_ use of kernel nexthops.
100 */
101 static bool supports_nh;
102
103 struct gw_family_t {
104 uint16_t filler;
105 uint16_t family;
106 union g_addr gate;
107 };
108
109 static const char ipv4_ll_buf[16] = "169.254.0.1";
110 static struct in_addr ipv4_ll;
111
112 /* Is this a ipv4 over ipv6 route? */
113 static bool is_route_v4_over_v6(unsigned char rtm_family,
114 enum nexthop_types_t nexthop_type)
115 {
116 if (rtm_family == AF_INET
117 && (nexthop_type == NEXTHOP_TYPE_IPV6
118 || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
119 return true;
120
121 return false;
122 }
123
124 /* Helper to control use of kernel-level nexthop ids */
125 static bool kernel_nexthops_supported(void)
126 {
127 return (supports_nh && !vrf_is_backend_netns()
128 && zebra_nhg_kernel_nexthops_enabled());
129 }
130
131 /*
132 * Some people may only want to use NHGs created by protos and not
133 * implicitly created by Zebra. This check accounts for that.
134 */
135 static bool proto_nexthops_only(void)
136 {
137 return zebra_nhg_proto_nexthops_only();
138 }
139
140 /* Is this a proto created NHG? */
141 static bool is_proto_nhg(uint32_t id, int type)
142 {
143 /* If type is available, use it as the source of truth */
144 if (type) {
145 if (type != ZEBRA_ROUTE_NHG)
146 return true;
147 return false;
148 }
149
150 if (id >= ZEBRA_NHG_PROTO_LOWER)
151 return true;
152
153 return false;
154 }
155
156 /*
157 * The ipv4_ll data structure is used for all 5549
158 * additions to the kernel. Let's figure out the
159 * correct value one time instead for every
160 * install/remove of a 5549 type route
161 */
162 void rt_netlink_init(void)
163 {
164 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
165 }
166
167 /*
168 * Mapping from dataplane neighbor flags to netlink flags
169 */
170 static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
171 {
172 uint8_t flags = 0;
173
174 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
175 flags |= NTF_EXT_LEARNED;
176 if (dplane_flags & DPLANE_NTF_ROUTER)
177 flags |= NTF_ROUTER;
178 if (dplane_flags & DPLANE_NTF_USE)
179 flags |= NTF_USE;
180
181 return flags;
182 }
183
184 /*
185 * Mapping from dataplane neighbor state to netlink state
186 */
187 static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
188 {
189 uint16_t state = 0;
190
191 if (dplane_state & DPLANE_NUD_REACHABLE)
192 state |= NUD_REACHABLE;
193 if (dplane_state & DPLANE_NUD_STALE)
194 state |= NUD_STALE;
195 if (dplane_state & DPLANE_NUD_NOARP)
196 state |= NUD_NOARP;
197 if (dplane_state & DPLANE_NUD_PROBE)
198 state |= NUD_PROBE;
199 if (dplane_state & DPLANE_NUD_INCOMPLETE)
200 state |= NUD_INCOMPLETE;
201 if (dplane_state & DPLANE_NUD_PERMANENT)
202 state |= NUD_PERMANENT;
203 if (dplane_state & DPLANE_NUD_FAILED)
204 state |= NUD_FAILED;
205
206 return state;
207 }
208
209
210 static inline bool is_selfroute(int proto)
211 {
212 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
213 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
214 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
215 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
216 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
217 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
218 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)
219 || (proto == RTPROT_SRTE)) {
220 return true;
221 }
222
223 return false;
224 }
225
226 static inline int zebra2proto(int proto)
227 {
228 switch (proto) {
229 case ZEBRA_ROUTE_BABEL:
230 proto = RTPROT_BABEL;
231 break;
232 case ZEBRA_ROUTE_BGP:
233 proto = RTPROT_BGP;
234 break;
235 case ZEBRA_ROUTE_OSPF:
236 case ZEBRA_ROUTE_OSPF6:
237 proto = RTPROT_OSPF;
238 break;
239 case ZEBRA_ROUTE_STATIC:
240 proto = RTPROT_ZSTATIC;
241 break;
242 case ZEBRA_ROUTE_ISIS:
243 proto = RTPROT_ISIS;
244 break;
245 case ZEBRA_ROUTE_RIP:
246 proto = RTPROT_RIP;
247 break;
248 case ZEBRA_ROUTE_RIPNG:
249 proto = RTPROT_RIPNG;
250 break;
251 case ZEBRA_ROUTE_NHRP:
252 proto = RTPROT_NHRP;
253 break;
254 case ZEBRA_ROUTE_EIGRP:
255 proto = RTPROT_EIGRP;
256 break;
257 case ZEBRA_ROUTE_LDP:
258 proto = RTPROT_LDP;
259 break;
260 case ZEBRA_ROUTE_SHARP:
261 proto = RTPROT_SHARP;
262 break;
263 case ZEBRA_ROUTE_PBR:
264 proto = RTPROT_PBR;
265 break;
266 case ZEBRA_ROUTE_OPENFABRIC:
267 proto = RTPROT_OPENFABRIC;
268 break;
269 case ZEBRA_ROUTE_SRTE:
270 proto = RTPROT_SRTE;
271 break;
272 case ZEBRA_ROUTE_TABLE:
273 case ZEBRA_ROUTE_NHG:
274 proto = RTPROT_ZEBRA;
275 break;
276 case ZEBRA_ROUTE_CONNECT:
277 case ZEBRA_ROUTE_KERNEL:
278 proto = RTPROT_KERNEL;
279 break;
280 default:
281 /*
282 * When a user adds a new protocol this will show up
283 * to let them know to do something about it. This
284 * is intentionally a warn because we should see
285 * this as part of development of a new protocol
286 */
287 zlog_debug(
288 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
289 __func__, proto);
290 proto = RTPROT_ZEBRA;
291 break;
292 }
293
294 return proto;
295 }
296
297 static inline int proto2zebra(int proto, int family, bool is_nexthop)
298 {
299 switch (proto) {
300 case RTPROT_BABEL:
301 proto = ZEBRA_ROUTE_BABEL;
302 break;
303 case RTPROT_BGP:
304 proto = ZEBRA_ROUTE_BGP;
305 break;
306 case RTPROT_OSPF:
307 proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF
308 : ZEBRA_ROUTE_OSPF6;
309 break;
310 case RTPROT_ISIS:
311 proto = ZEBRA_ROUTE_ISIS;
312 break;
313 case RTPROT_RIP:
314 proto = ZEBRA_ROUTE_RIP;
315 break;
316 case RTPROT_RIPNG:
317 proto = ZEBRA_ROUTE_RIPNG;
318 break;
319 case RTPROT_NHRP:
320 proto = ZEBRA_ROUTE_NHRP;
321 break;
322 case RTPROT_EIGRP:
323 proto = ZEBRA_ROUTE_EIGRP;
324 break;
325 case RTPROT_LDP:
326 proto = ZEBRA_ROUTE_LDP;
327 break;
328 case RTPROT_STATIC:
329 case RTPROT_ZSTATIC:
330 proto = ZEBRA_ROUTE_STATIC;
331 break;
332 case RTPROT_SHARP:
333 proto = ZEBRA_ROUTE_SHARP;
334 break;
335 case RTPROT_PBR:
336 proto = ZEBRA_ROUTE_PBR;
337 break;
338 case RTPROT_OPENFABRIC:
339 proto = ZEBRA_ROUTE_OPENFABRIC;
340 break;
341 case RTPROT_SRTE:
342 proto = ZEBRA_ROUTE_SRTE;
343 break;
344 case RTPROT_UNSPEC:
345 case RTPROT_REDIRECT:
346 case RTPROT_KERNEL:
347 case RTPROT_BOOT:
348 case RTPROT_GATED:
349 case RTPROT_RA:
350 case RTPROT_MRT:
351 case RTPROT_BIRD:
352 case RTPROT_DNROUTED:
353 case RTPROT_XORP:
354 case RTPROT_NTK:
355 case RTPROT_MROUTED:
356 case RTPROT_KEEPALIVED:
357 case RTPROT_OPENR:
358 proto = ZEBRA_ROUTE_KERNEL;
359 break;
360 case RTPROT_ZEBRA:
361 if (is_nexthop) {
362 proto = ZEBRA_ROUTE_NHG;
363 break;
364 }
365 /* Intentional fall thru */
366 default:
367 /*
368 * When a user adds a new protocol this will show up
369 * to let them know to do something about it. This
370 * is intentionally a warn because we should see
371 * this as part of development of a new protocol
372 */
373 zlog_debug(
374 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
375 __func__, proto);
376 proto = ZEBRA_ROUTE_KERNEL;
377 break;
378 }
379 return proto;
380 }
381
382 /*
383 Pending: create an efficient table_id (in a tree/hash) based lookup)
384 */
385 vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
386 {
387 struct vrf *vrf;
388 struct zebra_vrf *zvrf;
389
390 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
391 zvrf = vrf->info;
392 if (zvrf == NULL)
393 continue;
394 /* case vrf with netns : match the netnsid */
395 if (vrf_is_backend_netns()) {
396 if (ns_id == zvrf_id(zvrf))
397 return zvrf_id(zvrf);
398 } else {
399 /* VRF is VRF_BACKEND_VRF_LITE */
400 if (zvrf->table_id != table_id)
401 continue;
402 return zvrf_id(zvrf);
403 }
404 }
405
406 return VRF_DEFAULT;
407 }
408
409 /**
410 * @parse_encap_mpls() - Parses encapsulated mpls attributes
411 * @tb: Pointer to rtattr to look for nested items in.
412 * @labels: Pointer to store labels in.
413 *
414 * Return: Number of mpls labels found.
415 */
416 static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
417 {
418 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
419 mpls_lse_t *lses = NULL;
420 int num_labels = 0;
421 uint32_t ttl = 0;
422 uint32_t bos = 0;
423 uint32_t exp = 0;
424 mpls_label_t label = 0;
425
426 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
427 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
428 while (!bos && num_labels < MPLS_MAX_LABELS) {
429 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
430 labels[num_labels++] = label;
431 }
432
433 return num_labels;
434 }
435
436 static enum seg6local_action_t
437 parse_encap_seg6local(struct rtattr *tb,
438 struct seg6local_context *ctx)
439 {
440 struct rtattr *tb_encap[SEG6_LOCAL_MAX + 1] = {};
441 enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
442
443 netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_MAX, tb);
444
445 if (tb_encap[SEG6_LOCAL_ACTION])
446 act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]);
447
448 if (tb_encap[SEG6_LOCAL_NH4])
449 ctx->nh4 = *(struct in_addr *)RTA_DATA(
450 tb_encap[SEG6_LOCAL_NH4]);
451
452 if (tb_encap[SEG6_LOCAL_NH6])
453 ctx->nh6 = *(struct in6_addr *)RTA_DATA(
454 tb_encap[SEG6_LOCAL_NH6]);
455
456 if (tb_encap[SEG6_LOCAL_TABLE])
457 ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]);
458
459 if (tb_encap[SEG6_LOCAL_VRFTABLE])
460 ctx->table =
461 *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]);
462
463 return act;
464 }
465
466 static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs)
467 {
468 struct rtattr *tb_encap[SEG6_IPTUNNEL_MAX + 1] = {};
469 struct seg6_iptunnel_encap *ipt = NULL;
470 struct in6_addr *segments = NULL;
471
472 netlink_parse_rtattr_nested(tb_encap, SEG6_IPTUNNEL_MAX, tb);
473
474 /*
475 * TODO: It's not support multiple SID list.
476 */
477 if (tb_encap[SEG6_IPTUNNEL_SRH]) {
478 ipt = (struct seg6_iptunnel_encap *)
479 RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]);
480 segments = ipt->srh[0].segments;
481 *segs = segments[0];
482 return 1;
483 }
484
485 return 0;
486 }
487
488
489 static struct nexthop
490 parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
491 enum blackhole_type bh_type, int index, void *prefsrc,
492 void *gate, afi_t afi, vrf_id_t vrf_id)
493 {
494 struct interface *ifp = NULL;
495 struct nexthop nh = {0};
496 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
497 int num_labels = 0;
498 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
499 struct seg6local_context seg6l_ctx = {};
500 struct in6_addr seg6_segs = {};
501 int num_segs = 0;
502
503 vrf_id_t nh_vrf_id = vrf_id;
504 size_t sz = (afi == AFI_IP) ? 4 : 16;
505
506 if (bh_type == BLACKHOLE_UNSPEC) {
507 if (index && !gate)
508 nh.type = NEXTHOP_TYPE_IFINDEX;
509 else if (index && gate)
510 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
511 : NEXTHOP_TYPE_IPV6_IFINDEX;
512 else if (!index && gate)
513 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
514 : NEXTHOP_TYPE_IPV6;
515 else {
516 nh.type = NEXTHOP_TYPE_BLACKHOLE;
517 nh.bh_type = bh_type;
518 }
519 } else {
520 nh.type = NEXTHOP_TYPE_BLACKHOLE;
521 nh.bh_type = bh_type;
522 }
523 nh.ifindex = index;
524 if (prefsrc)
525 memcpy(&nh.src, prefsrc, sz);
526 if (gate)
527 memcpy(&nh.gate, gate, sz);
528
529 if (index) {
530 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
531 if (ifp)
532 nh_vrf_id = ifp->vrf->vrf_id;
533 }
534 nh.vrf_id = nh_vrf_id;
535
536 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
537 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
538 == LWTUNNEL_ENCAP_MPLS) {
539 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
540 }
541 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
542 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
543 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
544 seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx);
545 }
546 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
547 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
548 == LWTUNNEL_ENCAP_SEG6) {
549 num_segs = parse_encap_seg6(tb[RTA_ENCAP], &seg6_segs);
550 }
551
552 if (rtm->rtm_flags & RTNH_F_ONLINK)
553 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
554
555 if (rtm->rtm_flags & RTNH_F_LINKDOWN)
556 SET_FLAG(nh.flags, NEXTHOP_FLAG_LINKDOWN);
557
558 if (num_labels)
559 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
560
561 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
562 nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx);
563
564 if (num_segs)
565 nexthop_add_srv6_seg6(&nh, &seg6_segs);
566
567 return nh;
568 }
569
570 static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
571 struct nexthop_group *ng,
572 struct rtmsg *rtm,
573 struct rtnexthop *rtnh,
574 struct rtattr **tb,
575 void *prefsrc, vrf_id_t vrf_id)
576 {
577 void *gate = NULL;
578 struct interface *ifp = NULL;
579 int index = 0;
580 /* MPLS labels */
581 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
582 int num_labels = 0;
583 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
584 struct seg6local_context seg6l_ctx = {};
585 struct in6_addr seg6_segs = {};
586 int num_segs = 0;
587 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
588
589 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
590 vrf_id_t nh_vrf_id = vrf_id;
591
592 for (;;) {
593 struct nexthop *nh = NULL;
594
595 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
596 break;
597
598 index = rtnh->rtnh_ifindex;
599 if (index) {
600 /*
601 * Yes we are looking this up
602 * for every nexthop and just
603 * using the last one looked
604 * up right now
605 */
606 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
607 index);
608 if (ifp)
609 nh_vrf_id = ifp->vrf->vrf_id;
610 else {
611 flog_warn(
612 EC_ZEBRA_UNKNOWN_INTERFACE,
613 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
614 __func__, index);
615 nh_vrf_id = VRF_DEFAULT;
616 }
617 } else
618 nh_vrf_id = vrf_id;
619
620 if (rtnh->rtnh_len > sizeof(*rtnh)) {
621 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
622 rtnh->rtnh_len - sizeof(*rtnh));
623 if (rtnh_tb[RTA_GATEWAY])
624 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
625 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
626 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
627 == LWTUNNEL_ENCAP_MPLS) {
628 num_labels = parse_encap_mpls(
629 rtnh_tb[RTA_ENCAP], labels);
630 }
631 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
632 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
633 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
634 seg6l_act = parse_encap_seg6local(
635 rtnh_tb[RTA_ENCAP], &seg6l_ctx);
636 }
637 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
638 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
639 == LWTUNNEL_ENCAP_SEG6) {
640 num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP],
641 &seg6_segs);
642 }
643 }
644
645 if (gate && rtm->rtm_family == AF_INET) {
646 if (index)
647 nh = nexthop_from_ipv4_ifindex(
648 gate, prefsrc, index, nh_vrf_id);
649 else
650 nh = nexthop_from_ipv4(gate, prefsrc,
651 nh_vrf_id);
652 } else if (gate && rtm->rtm_family == AF_INET6) {
653 if (index)
654 nh = nexthop_from_ipv6_ifindex(
655 gate, index, nh_vrf_id);
656 else
657 nh = nexthop_from_ipv6(gate, nh_vrf_id);
658 } else
659 nh = nexthop_from_ifindex(index, nh_vrf_id);
660
661 if (nh) {
662 nh->weight = rtnh->rtnh_hops + 1;
663
664 if (num_labels)
665 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
666 num_labels, labels);
667
668 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
669 nexthop_add_srv6_seg6local(nh, seg6l_act,
670 &seg6l_ctx);
671
672 if (num_segs)
673 nexthop_add_srv6_seg6(nh, &seg6_segs);
674
675 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
676 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
677
678 /* Add to temporary list */
679 nexthop_group_add_sorted(ng, nh);
680 }
681
682 if (rtnh->rtnh_len == 0)
683 break;
684
685 len -= NLMSG_ALIGN(rtnh->rtnh_len);
686 rtnh = RTNH_NEXT(rtnh);
687 }
688
689 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
690
691 return nhop_num;
692 }
693
694 /* Looking up routing table by netlink interface. */
695 static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
696 int startup)
697 {
698 int len;
699 struct rtmsg *rtm;
700 struct rtattr *tb[RTA_MAX + 1];
701 uint32_t flags = 0;
702 struct prefix p;
703 struct prefix_ipv6 src_p = {};
704 vrf_id_t vrf_id;
705 bool selfroute;
706
707 char anyaddr[16] = {0};
708
709 int proto = ZEBRA_ROUTE_KERNEL;
710 int index = 0;
711 int table;
712 int metric = 0;
713 uint32_t mtu = 0;
714 uint8_t distance = 0;
715 route_tag_t tag = 0;
716 uint32_t nhe_id = 0;
717
718 void *dest = NULL;
719 void *gate = NULL;
720 void *prefsrc = NULL; /* IPv4 preferred source host address */
721 void *src = NULL; /* IPv6 srcdest source prefix */
722 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
723
724 frrtrace(3, frr_zebra, netlink_route_change_read_unicast, h, ns_id,
725 startup);
726
727 rtm = NLMSG_DATA(h);
728
729 if (startup && h->nlmsg_type != RTM_NEWROUTE)
730 return 0;
731 switch (rtm->rtm_type) {
732 case RTN_UNICAST:
733 break;
734 case RTN_BLACKHOLE:
735 bh_type = BLACKHOLE_NULL;
736 break;
737 case RTN_UNREACHABLE:
738 bh_type = BLACKHOLE_REJECT;
739 break;
740 case RTN_PROHIBIT:
741 bh_type = BLACKHOLE_ADMINPROHIB;
742 break;
743 default:
744 if (IS_ZEBRA_DEBUG_KERNEL)
745 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
746 nl_rttype_to_str(rtm->rtm_type),
747 rtm->rtm_type);
748 return 0;
749 }
750
751 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
752 if (len < 0) {
753 zlog_err(
754 "%s: Message received from netlink is of a broken size %d %zu",
755 __func__, h->nlmsg_len,
756 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
757 return -1;
758 }
759
760 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
761
762 if (rtm->rtm_flags & RTM_F_CLONED)
763 return 0;
764 if (rtm->rtm_protocol == RTPROT_REDIRECT)
765 return 0;
766 if (rtm->rtm_protocol == RTPROT_KERNEL)
767 return 0;
768
769 selfroute = is_selfroute(rtm->rtm_protocol);
770
771 if (!startup && selfroute
772 && h->nlmsg_type == RTM_NEWROUTE
773 && !zrouter.asic_offloaded) {
774 if (IS_ZEBRA_DEBUG_KERNEL)
775 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
776 rtm->rtm_protocol);
777 return 0;
778 }
779
780 /* We don't care about change notifications for the MPLS table. */
781 /* TODO: Revisit this. */
782 if (rtm->rtm_family == AF_MPLS)
783 return 0;
784
785 /* Table corresponding to route. */
786 if (tb[RTA_TABLE])
787 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
788 else
789 table = rtm->rtm_table;
790
791 /* Map to VRF */
792 vrf_id = vrf_lookup_by_table(table, ns_id);
793 if (vrf_id == VRF_DEFAULT) {
794 if (!is_zebra_valid_kernel_table(table)
795 && !is_zebra_main_routing_table(table))
796 return 0;
797 }
798
799 if (rtm->rtm_flags & RTM_F_TRAP)
800 flags |= ZEBRA_FLAG_TRAPPED;
801 if (rtm->rtm_flags & RTM_F_OFFLOAD)
802 flags |= ZEBRA_FLAG_OFFLOADED;
803 if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED)
804 flags |= ZEBRA_FLAG_OFFLOAD_FAILED;
805
806 /* Route which inserted by Zebra. */
807 if (selfroute) {
808 flags |= ZEBRA_FLAG_SELFROUTE;
809 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
810 }
811 if (tb[RTA_OIF])
812 index = *(int *)RTA_DATA(tb[RTA_OIF]);
813
814 if (tb[RTA_DST])
815 dest = RTA_DATA(tb[RTA_DST]);
816 else
817 dest = anyaddr;
818
819 if (tb[RTA_SRC])
820 src = RTA_DATA(tb[RTA_SRC]);
821 else
822 src = anyaddr;
823
824 if (tb[RTA_PREFSRC])
825 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
826
827 if (tb[RTA_GATEWAY])
828 gate = RTA_DATA(tb[RTA_GATEWAY]);
829
830 if (tb[RTA_NH_ID])
831 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
832
833 if (tb[RTA_PRIORITY])
834 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
835
836 #if defined(SUPPORT_REALMS)
837 if (tb[RTA_FLOW])
838 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
839 #endif
840
841 if (tb[RTA_METRICS]) {
842 struct rtattr *mxrta[RTAX_MAX + 1];
843
844 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
845 RTA_PAYLOAD(tb[RTA_METRICS]));
846
847 if (mxrta[RTAX_MTU])
848 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
849 }
850
851 if (rtm->rtm_family == AF_INET) {
852 p.family = AF_INET;
853 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
854 zlog_err(
855 "Invalid destination prefix length: %u received from kernel route change",
856 rtm->rtm_dst_len);
857 return -1;
858 }
859 memcpy(&p.u.prefix4, dest, 4);
860 p.prefixlen = rtm->rtm_dst_len;
861
862 if (rtm->rtm_src_len != 0) {
863 flog_warn(
864 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
865 "unsupported IPv4 sourcedest route (dest %pFX vrf %u)",
866 &p, vrf_id);
867 return 0;
868 }
869
870 /* Force debug below to not display anything for source */
871 src_p.prefixlen = 0;
872 } else if (rtm->rtm_family == AF_INET6) {
873 p.family = AF_INET6;
874 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
875 zlog_err(
876 "Invalid destination prefix length: %u received from kernel route change",
877 rtm->rtm_dst_len);
878 return -1;
879 }
880 memcpy(&p.u.prefix6, dest, 16);
881 p.prefixlen = rtm->rtm_dst_len;
882
883 src_p.family = AF_INET6;
884 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
885 zlog_err(
886 "Invalid source prefix length: %u received from kernel route change",
887 rtm->rtm_src_len);
888 return -1;
889 }
890 memcpy(&src_p.prefix, src, 16);
891 src_p.prefixlen = rtm->rtm_src_len;
892 } else {
893 /* We only handle the AFs we handle... */
894 if (IS_ZEBRA_DEBUG_KERNEL)
895 zlog_debug("%s: unknown address-family %u", __func__,
896 rtm->rtm_family);
897 return 0;
898 }
899
900 /*
901 * For ZEBRA_ROUTE_KERNEL types:
902 *
903 * The metric/priority of the route received from the kernel
904 * is a 32 bit number. We are going to interpret the high
905 * order byte as the Admin Distance and the low order 3 bytes
906 * as the metric.
907 *
908 * This will allow us to do two things:
909 * 1) Allow the creation of kernel routes that can be
910 * overridden by zebra.
911 * 2) Allow the old behavior for 'most' kernel route types
912 * if a user enters 'ip route ...' v4 routes get a metric
913 * of 0 and v6 routes get a metric of 1024. Both of these
914 * values will end up with a admin distance of 0, which
915 * will cause them to win for the purposes of zebra.
916 */
917 if (proto == ZEBRA_ROUTE_KERNEL) {
918 distance = (metric >> 24) & 0xFF;
919 metric = (metric & 0x00FFFFFF);
920 }
921
922 if (IS_ZEBRA_DEBUG_KERNEL) {
923 char buf2[PREFIX_STRLEN];
924
925 zlog_debug(
926 "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
927 nl_msg_type_to_str(h->nlmsg_type), &p,
928 src_p.prefixlen ? " from " : "",
929 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
930 : "",
931 vrf_id_to_name(vrf_id), vrf_id, table, metric,
932 distance);
933 }
934
935 afi_t afi = AFI_IP;
936 if (rtm->rtm_family == AF_INET6)
937 afi = AFI_IP6;
938
939 if (h->nlmsg_type == RTM_NEWROUTE) {
940 struct route_entry *re;
941 struct nexthop_group *ng = NULL;
942
943 re = zebra_rib_route_entry_new(vrf_id, proto, 0, flags, nhe_id,
944 table, metric, mtu, distance,
945 tag);
946 if (!nhe_id)
947 ng = nexthop_group_new();
948
949 if (!tb[RTA_MULTIPATH]) {
950 struct nexthop *nexthop, nh;
951
952 if (!nhe_id) {
953 nh = parse_nexthop_unicast(
954 ns_id, rtm, tb, bh_type, index, prefsrc,
955 gate, afi, vrf_id);
956
957 nexthop = nexthop_new();
958 *nexthop = nh;
959 nexthop_group_add_sorted(ng, nexthop);
960 }
961 } else {
962 /* This is a multipath route */
963 struct rtnexthop *rtnh =
964 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
965
966 if (!nhe_id) {
967 uint8_t nhop_num;
968
969 /* Use temporary list of nexthops; parse
970 * message payload's nexthops.
971 */
972 nhop_num =
973 parse_multipath_nexthops_unicast(
974 ns_id, ng, rtm, rtnh, tb,
975 prefsrc, vrf_id);
976
977 zserv_nexthop_num_warn(
978 __func__, (const struct prefix *)&p,
979 nhop_num);
980
981 if (nhop_num == 0) {
982 nexthop_group_delete(&ng);
983 ng = NULL;
984 }
985 }
986 }
987 if (nhe_id || ng)
988 rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p, re, ng,
989 startup);
990 else {
991 /*
992 * I really don't see how this is possible
993 * but since we are testing for it let's
994 * let the end user know why the route
995 * that was just received was swallowed
996 * up and forgotten
997 */
998 zlog_err(
999 "%s: %pFX multipath RTM_NEWROUTE has a invalid nexthop group from the kernel",
1000 __func__, &p);
1001 XFREE(MTYPE_RE, re);
1002 }
1003 } else {
1004 if (nhe_id) {
1005 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
1006 &p, &src_p, NULL, nhe_id, table, metric,
1007 distance, true);
1008 } else {
1009 if (!tb[RTA_MULTIPATH]) {
1010 struct nexthop nh;
1011
1012 nh = parse_nexthop_unicast(
1013 ns_id, rtm, tb, bh_type, index, prefsrc,
1014 gate, afi, vrf_id);
1015 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1016 flags, &p, &src_p, &nh, 0, table,
1017 metric, distance, true);
1018 } else {
1019 /* XXX: need to compare the entire list of
1020 * nexthops here for NLM_F_APPEND stupidity */
1021 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1022 flags, &p, &src_p, NULL, 0, table,
1023 metric, distance, true);
1024 }
1025 }
1026 }
1027
1028 return 0;
1029 }
1030
1031 static struct mcast_route_data *mroute = NULL;
1032
1033 static int netlink_route_change_read_multicast(struct nlmsghdr *h,
1034 ns_id_t ns_id, int startup)
1035 {
1036 int len;
1037 struct rtmsg *rtm;
1038 struct rtattr *tb[RTA_MAX + 1];
1039 struct mcast_route_data *m;
1040 int iif = 0;
1041 int count;
1042 int oif[256];
1043 int oif_count = 0;
1044 char oif_list[256] = "\0";
1045 vrf_id_t vrf;
1046 int table;
1047
1048 assert(mroute);
1049 m = mroute;
1050
1051 rtm = NLMSG_DATA(h);
1052
1053 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1054
1055 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
1056
1057 if (tb[RTA_TABLE])
1058 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
1059 else
1060 table = rtm->rtm_table;
1061
1062 vrf = vrf_lookup_by_table(table, ns_id);
1063
1064 if (tb[RTA_IIF])
1065 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
1066
1067 if (tb[RTA_SRC]) {
1068 if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1069 m->src.ipaddr_v4 =
1070 *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
1071 else
1072 m->src.ipaddr_v6 =
1073 *(struct in6_addr *)RTA_DATA(tb[RTA_SRC]);
1074 }
1075
1076 if (tb[RTA_DST]) {
1077 if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1078 m->grp.ipaddr_v4 =
1079 *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
1080 else
1081 m->grp.ipaddr_v6 =
1082 *(struct in6_addr *)RTA_DATA(tb[RTA_DST]);
1083 }
1084
1085 if (tb[RTA_EXPIRES])
1086 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
1087
1088 if (tb[RTA_MULTIPATH]) {
1089 struct rtnexthop *rtnh =
1090 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
1091
1092 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
1093 for (;;) {
1094 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
1095 break;
1096
1097 oif[oif_count] = rtnh->rtnh_ifindex;
1098 oif_count++;
1099
1100 if (rtnh->rtnh_len == 0)
1101 break;
1102
1103 len -= NLMSG_ALIGN(rtnh->rtnh_len);
1104 rtnh = RTNH_NEXT(rtnh);
1105 }
1106 }
1107
1108 if (rtm->rtm_family == RTNL_FAMILY_IPMR) {
1109 SET_IPADDR_V4(&m->src);
1110 SET_IPADDR_V4(&m->grp);
1111 } else if (rtm->rtm_family == RTNL_FAMILY_IP6MR) {
1112 SET_IPADDR_V6(&m->src);
1113 SET_IPADDR_V6(&m->grp);
1114 } else {
1115 zlog_warn("%s: Invalid rtm_family received", __func__);
1116 return 0;
1117 }
1118
1119 if (IS_ZEBRA_DEBUG_KERNEL) {
1120 struct interface *ifp = NULL;
1121 struct zebra_vrf *zvrf = NULL;
1122
1123 for (count = 0; count < oif_count; count++) {
1124 ifp = if_lookup_by_index(oif[count], vrf);
1125 char temp[256];
1126
1127 snprintf(temp, sizeof(temp), "%s(%d) ",
1128 ifp ? ifp->name : "Unknown", oif[count]);
1129 strlcat(oif_list, temp, sizeof(oif_list));
1130 }
1131 zvrf = zebra_vrf_lookup_by_id(vrf);
1132 ifp = if_lookup_by_index(iif, vrf);
1133 zlog_debug(
1134 "MCAST VRF: %s(%d) %s (%pIA,%pIA) IIF: %s(%d) OIF: %s jiffies: %lld",
1135 zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
1136 &m->src, &m->grp, ifp ? ifp->name : "Unknown", iif,
1137 oif_list, m->lastused);
1138 }
1139 return 0;
1140 }
1141
1142 int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
1143 {
1144 int len;
1145 struct rtmsg *rtm;
1146
1147 rtm = NLMSG_DATA(h);
1148
1149 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
1150 /* If this is not route add/delete message print warning. */
1151 zlog_debug("Kernel message: %s NS %u",
1152 nl_msg_type_to_str(h->nlmsg_type), ns_id);
1153 return 0;
1154 }
1155
1156 switch (rtm->rtm_family) {
1157 case AF_INET:
1158 case AF_INET6:
1159 break;
1160
1161 case RTNL_FAMILY_IPMR:
1162 case RTNL_FAMILY_IP6MR:
1163 /* notifications on IPMR are irrelevant to zebra, we only care
1164 * about responses to RTM_GETROUTE requests we sent.
1165 */
1166 return 0;
1167
1168 default:
1169 flog_warn(
1170 EC_ZEBRA_UNKNOWN_FAMILY,
1171 "Invalid address family: %u received from kernel route change: %s",
1172 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
1173 return 0;
1174 }
1175
1176 /* Connected route. */
1177 if (IS_ZEBRA_DEBUG_KERNEL)
1178 zlog_debug("%s %s %s proto %s NS %u",
1179 nl_msg_type_to_str(h->nlmsg_type),
1180 nl_family_to_str(rtm->rtm_family),
1181 nl_rttype_to_str(rtm->rtm_type),
1182 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
1183
1184
1185 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1186 if (len < 0) {
1187 zlog_err(
1188 "%s: Message received from netlink is of a broken size: %d %zu",
1189 __func__, h->nlmsg_len,
1190 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
1191 return -1;
1192 }
1193
1194 /* these are "magic" kernel-managed *unicast* routes used for
1195 * outputting locally generated multicast traffic (which uses unicast
1196 * handling on Linux because ~reasons~.
1197 */
1198 if (rtm->rtm_type == RTN_MULTICAST)
1199 return 0;
1200
1201 netlink_route_change_read_unicast(h, ns_id, startup);
1202 return 0;
1203 }
1204
1205 /* Request for specific route information from the kernel */
1206 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
1207 {
1208 struct {
1209 struct nlmsghdr n;
1210 struct rtmsg rtm;
1211 } req;
1212
1213 /* Form the request, specifying filter (rtattr) if needed. */
1214 memset(&req, 0, sizeof(req));
1215 req.n.nlmsg_type = type;
1216 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
1217 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1218 req.rtm.rtm_family = family;
1219
1220 return netlink_request(&zns->netlink_cmd, &req);
1221 }
1222
1223 /* Routing table read function using netlink interface. Only called
1224 bootstrap time. */
1225 int netlink_route_read(struct zebra_ns *zns)
1226 {
1227 int ret;
1228 struct zebra_dplane_info dp_info;
1229
1230 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
1231
1232 /* Get IPv4 routing table. */
1233 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1234 if (ret < 0)
1235 return ret;
1236 ret = netlink_parse_info(netlink_route_change_read_unicast,
1237 &zns->netlink_cmd, &dp_info, 0, true);
1238 if (ret < 0)
1239 return ret;
1240
1241 /* Get IPv6 routing table. */
1242 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1243 if (ret < 0)
1244 return ret;
1245 ret = netlink_parse_info(netlink_route_change_read_unicast,
1246 &zns->netlink_cmd, &dp_info, 0, true);
1247 if (ret < 0)
1248 return ret;
1249
1250 return 0;
1251 }
1252
1253 /*
1254 * The function returns true if the gateway info could be added
1255 * to the message, otherwise false is returned.
1256 */
1257 static bool _netlink_route_add_gateway_info(uint8_t route_family,
1258 uint8_t gw_family,
1259 struct nlmsghdr *nlmsg,
1260 size_t req_size, int bytelen,
1261 const struct nexthop *nexthop)
1262 {
1263 if (route_family == AF_MPLS) {
1264 struct gw_family_t gw_fam;
1265
1266 gw_fam.family = gw_family;
1267 if (gw_family == AF_INET)
1268 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1269 else
1270 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1271 if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1272 bytelen + 2))
1273 return false;
1274 } else {
1275 if (!(nexthop->rparent
1276 && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1277 if (gw_family == AF_INET) {
1278 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1279 &nexthop->gate.ipv4, bytelen))
1280 return false;
1281 } else {
1282 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1283 &nexthop->gate.ipv6, bytelen))
1284 return false;
1285 }
1286 }
1287 }
1288
1289 return true;
1290 }
1291
1292 static int build_label_stack(struct mpls_label_stack *nh_label,
1293 mpls_lse_t *out_lse, char *label_buf,
1294 size_t label_buf_size)
1295 {
1296 char label_buf1[20];
1297 int num_labels = 0;
1298
1299 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1300 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1301 continue;
1302
1303 if (IS_ZEBRA_DEBUG_KERNEL) {
1304 if (!num_labels)
1305 snprintf(label_buf, label_buf_size, "label %u",
1306 nh_label->label[i]);
1307 else {
1308 snprintf(label_buf1, sizeof(label_buf1), "/%u",
1309 nh_label->label[i]);
1310 strlcat(label_buf, label_buf1, label_buf_size);
1311 }
1312 }
1313
1314 out_lse[num_labels] =
1315 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1316 num_labels++;
1317 }
1318
1319 return num_labels;
1320 }
1321
1322 static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label,
1323 struct nlmsghdr *nlmsg,
1324 size_t buflen, struct rtmsg *rtmsg,
1325 char *label_buf,
1326 size_t label_buf_size)
1327 {
1328 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1329 int num_labels;
1330
1331 /*
1332 * label_buf is *only* currently used within debugging.
1333 * As such when we assign it we are guarding it inside
1334 * a debug test. If you want to change this make sure
1335 * you fix this assumption
1336 */
1337 label_buf[0] = '\0';
1338
1339 num_labels =
1340 build_label_stack(nh_label, out_lse, label_buf, label_buf_size);
1341
1342 if (num_labels) {
1343 /* Set the BoS bit */
1344 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1345
1346 if (rtmsg->rtm_family == AF_MPLS) {
1347 if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
1348 num_labels * sizeof(mpls_lse_t)))
1349 return false;
1350 } else {
1351 struct rtattr *nest;
1352
1353 if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1354 LWTUNNEL_ENCAP_MPLS))
1355 return false;
1356
1357 nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
1358 if (!nest)
1359 return false;
1360
1361 if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
1362 &out_lse,
1363 num_labels * sizeof(mpls_lse_t)))
1364 return false;
1365 nl_attr_nest_end(nlmsg, nest);
1366 }
1367 }
1368
1369 return true;
1370 }
1371
1372 static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1373 int family,
1374 struct nlmsghdr *nlmsg,
1375 size_t buflen, int bytelen)
1376 {
1377 if (family == AF_INET) {
1378 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1379 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1380 &nexthop->rmap_src.ipv4, bytelen))
1381 return false;
1382 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1383 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1384 &nexthop->src.ipv4, bytelen))
1385 return false;
1386 }
1387 } else if (family == AF_INET6) {
1388 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1389 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1390 &nexthop->rmap_src.ipv6, bytelen))
1391 return false;
1392 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1393 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1394 &nexthop->src.ipv6, bytelen))
1395 return false;
1396 }
1397 }
1398
1399 return true;
1400 }
1401
1402 static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen,
1403 const struct in6_addr *seg)
1404 {
1405 struct seg6_iptunnel_encap *ipt;
1406 struct ipv6_sr_hdr *srh;
1407 const size_t srhlen = 24;
1408
1409 /*
1410 * Caution: Support only SINGLE-SID, not MULTI-SID
1411 * This function only supports the case where segs represents
1412 * a single SID. If you want to extend the SRv6 functionality,
1413 * you should improve the Boundary Check.
1414 * Ex. In case of set a SID-List include multiple-SIDs as an
1415 * argument of the Transit Behavior, we must support variable
1416 * boundary check for buflen.
1417 */
1418 if (buflen < (sizeof(struct seg6_iptunnel_encap) +
1419 sizeof(struct ipv6_sr_hdr) + 16))
1420 return -1;
1421
1422 memset(buffer, 0, buflen);
1423
1424 ipt = (struct seg6_iptunnel_encap *)buffer;
1425 ipt->mode = SEG6_IPTUN_MODE_ENCAP;
1426 srh = ipt->srh;
1427 srh->hdrlen = (srhlen >> 3) - 1;
1428 srh->type = 4;
1429 srh->segments_left = 0;
1430 srh->first_segment = 0;
1431 memcpy(&srh->segments[0], seg, sizeof(struct in6_addr));
1432
1433 return srhlen + 4;
1434 }
1435
1436 /* This function takes a nexthop as argument and adds
1437 * the appropriate netlink attributes to an existing
1438 * netlink message.
1439 *
1440 * @param routedesc: Human readable description of route type
1441 * (direct/recursive, single-/multipath)
1442 * @param bytelen: Length of addresses in bytes.
1443 * @param nexthop: Nexthop information
1444 * @param nlmsg: nlmsghdr structure to fill in.
1445 * @param req_size: The size allocated for the message.
1446 *
1447 * The function returns true if the nexthop could be added
1448 * to the message, otherwise false is returned.
1449 */
1450 static bool _netlink_route_build_singlepath(const struct prefix *p,
1451 const char *routedesc, int bytelen,
1452 const struct nexthop *nexthop,
1453 struct nlmsghdr *nlmsg,
1454 struct rtmsg *rtmsg,
1455 size_t req_size, int cmd)
1456 {
1457
1458 char label_buf[256];
1459 struct vrf *vrf;
1460 char addrstr[INET6_ADDRSTRLEN];
1461
1462 assert(nexthop);
1463
1464 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1465
1466 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1467 req_size, rtmsg, label_buf,
1468 sizeof(label_buf)))
1469 return false;
1470
1471 if (nexthop->nh_srv6) {
1472 if (nexthop->nh_srv6->seg6local_action !=
1473 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
1474 struct rtattr *nest;
1475 const struct seg6local_context *ctx;
1476
1477 ctx = &nexthop->nh_srv6->seg6local_ctx;
1478 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1479 LWTUNNEL_ENCAP_SEG6_LOCAL))
1480 return false;
1481
1482 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1483 if (!nest)
1484 return false;
1485
1486 switch (nexthop->nh_srv6->seg6local_action) {
1487 case ZEBRA_SEG6_LOCAL_ACTION_END:
1488 if (!nl_attr_put32(nlmsg, req_size,
1489 SEG6_LOCAL_ACTION,
1490 SEG6_LOCAL_ACTION_END))
1491 return false;
1492 break;
1493 case ZEBRA_SEG6_LOCAL_ACTION_END_X:
1494 if (!nl_attr_put32(nlmsg, req_size,
1495 SEG6_LOCAL_ACTION,
1496 SEG6_LOCAL_ACTION_END_X))
1497 return false;
1498 if (!nl_attr_put(nlmsg, req_size,
1499 SEG6_LOCAL_NH6, &ctx->nh6,
1500 sizeof(struct in6_addr)))
1501 return false;
1502 break;
1503 case ZEBRA_SEG6_LOCAL_ACTION_END_T:
1504 if (!nl_attr_put32(nlmsg, req_size,
1505 SEG6_LOCAL_ACTION,
1506 SEG6_LOCAL_ACTION_END_T))
1507 return false;
1508 if (!nl_attr_put32(nlmsg, req_size,
1509 SEG6_LOCAL_TABLE,
1510 ctx->table))
1511 return false;
1512 break;
1513 case ZEBRA_SEG6_LOCAL_ACTION_END_DX4:
1514 if (!nl_attr_put32(nlmsg, req_size,
1515 SEG6_LOCAL_ACTION,
1516 SEG6_LOCAL_ACTION_END_DX4))
1517 return false;
1518 if (!nl_attr_put(nlmsg, req_size,
1519 SEG6_LOCAL_NH4, &ctx->nh4,
1520 sizeof(struct in_addr)))
1521 return false;
1522 break;
1523 case ZEBRA_SEG6_LOCAL_ACTION_END_DT6:
1524 if (!nl_attr_put32(nlmsg, req_size,
1525 SEG6_LOCAL_ACTION,
1526 SEG6_LOCAL_ACTION_END_DT6))
1527 return false;
1528 if (!nl_attr_put32(nlmsg, req_size,
1529 SEG6_LOCAL_TABLE,
1530 ctx->table))
1531 return false;
1532 break;
1533 case ZEBRA_SEG6_LOCAL_ACTION_END_DT4:
1534 if (!nl_attr_put32(nlmsg, req_size,
1535 SEG6_LOCAL_ACTION,
1536 SEG6_LOCAL_ACTION_END_DT4))
1537 return false;
1538 if (!nl_attr_put32(nlmsg, req_size,
1539 SEG6_LOCAL_VRFTABLE,
1540 ctx->table))
1541 return false;
1542 break;
1543 case ZEBRA_SEG6_LOCAL_ACTION_END_DT46:
1544 if (!nl_attr_put32(nlmsg, req_size,
1545 SEG6_LOCAL_ACTION,
1546 SEG6_LOCAL_ACTION_END_DT46))
1547 return false;
1548 if (!nl_attr_put32(nlmsg, req_size,
1549 SEG6_LOCAL_VRFTABLE,
1550 ctx->table))
1551 return false;
1552 break;
1553 default:
1554 zlog_err("%s: unsupport seg6local behaviour action=%u",
1555 __func__,
1556 nexthop->nh_srv6->seg6local_action);
1557 return false;
1558 }
1559 nl_attr_nest_end(nlmsg, nest);
1560 }
1561
1562 if (!sid_zero(&nexthop->nh_srv6->seg6_segs)) {
1563 char tun_buf[4096];
1564 ssize_t tun_len;
1565 struct rtattr *nest;
1566
1567 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1568 LWTUNNEL_ENCAP_SEG6))
1569 return false;
1570 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1571 if (!nest)
1572 return false;
1573 tun_len = fill_seg6ipt_encap(tun_buf, sizeof(tun_buf),
1574 &nexthop->nh_srv6->seg6_segs);
1575 if (tun_len < 0)
1576 return false;
1577 if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH,
1578 tun_buf, tun_len))
1579 return false;
1580 nl_attr_nest_end(nlmsg, nest);
1581 }
1582 }
1583
1584 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1585 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1586
1587 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1588 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1589 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1590 return false;
1591 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1592 return false;
1593
1594 if (cmd == RTM_NEWROUTE) {
1595 if (!_netlink_route_encode_nexthop_src(
1596 nexthop, AF_INET, nlmsg, req_size, bytelen))
1597 return false;
1598 }
1599
1600 if (IS_ZEBRA_DEBUG_KERNEL)
1601 zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1602 __func__, routedesc, p, ipv4_ll_buf,
1603 label_buf, nexthop->ifindex,
1604 VRF_LOGNAME(vrf), nexthop->vrf_id);
1605 return true;
1606 }
1607
1608 if (nexthop->type == NEXTHOP_TYPE_IPV4
1609 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1610 /* Send deletes to the kernel without specifying the next-hop */
1611 if (cmd != RTM_DELROUTE) {
1612 if (!_netlink_route_add_gateway_info(
1613 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1614 bytelen, nexthop))
1615 return false;
1616 }
1617
1618 if (cmd == RTM_NEWROUTE) {
1619 if (!_netlink_route_encode_nexthop_src(
1620 nexthop, AF_INET, nlmsg, req_size, bytelen))
1621 return false;
1622 }
1623
1624 if (IS_ZEBRA_DEBUG_KERNEL) {
1625 inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1626 sizeof(addrstr));
1627 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1628 __func__, routedesc, p, addrstr, label_buf,
1629 nexthop->ifindex, VRF_LOGNAME(vrf),
1630 nexthop->vrf_id);
1631 }
1632 }
1633
1634 if (nexthop->type == NEXTHOP_TYPE_IPV6
1635 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1636 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1637 AF_INET6, nlmsg, req_size,
1638 bytelen, nexthop))
1639 return false;
1640
1641 if (cmd == RTM_NEWROUTE) {
1642 if (!_netlink_route_encode_nexthop_src(
1643 nexthop, AF_INET6, nlmsg, req_size,
1644 bytelen))
1645 return false;
1646 }
1647
1648 if (IS_ZEBRA_DEBUG_KERNEL) {
1649 inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1650 sizeof(addrstr));
1651 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1652 __func__, routedesc, p, addrstr, label_buf,
1653 nexthop->ifindex, VRF_LOGNAME(vrf),
1654 nexthop->vrf_id);
1655 }
1656 }
1657
1658 /*
1659 * We have the ifindex so we should always send it
1660 * This is especially useful if we are doing route
1661 * leaking.
1662 */
1663 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1664 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1665 return false;
1666 }
1667
1668 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1669 if (cmd == RTM_NEWROUTE) {
1670 if (!_netlink_route_encode_nexthop_src(
1671 nexthop, AF_INET, nlmsg, req_size, bytelen))
1672 return false;
1673 }
1674
1675 if (IS_ZEBRA_DEBUG_KERNEL)
1676 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1677 __func__, routedesc, p, nexthop->ifindex,
1678 VRF_LOGNAME(vrf), nexthop->vrf_id);
1679 }
1680
1681 return true;
1682 }
1683
1684 /* This function appends tag value as rtnl flow attribute
1685 * to the given netlink msg only if value is less than 256.
1686 * Used only if SUPPORT_REALMS enabled.
1687 *
1688 * @param nlmsg: nlmsghdr structure to fill in.
1689 * @param maxlen: The size allocated for the message.
1690 * @param tag: The route tag.
1691 *
1692 * The function returns true if the flow attribute could
1693 * be added to the message, otherwise false is returned.
1694 */
1695 static inline bool _netlink_set_tag(struct nlmsghdr *n, unsigned int maxlen,
1696 route_tag_t tag)
1697 {
1698 if (tag > 0 && tag <= 255) {
1699 if (!nl_attr_put32(n, maxlen, RTA_FLOW, tag))
1700 return false;
1701 }
1702 return true;
1703 }
1704
1705 /* This function takes a nexthop as argument and
1706 * appends to the given netlink msg. If the nexthop
1707 * defines a preferred source, the src parameter
1708 * will be modified to point to that src, otherwise
1709 * it will be kept unmodified.
1710 *
1711 * @param routedesc: Human readable description of route type
1712 * (direct/recursive, single-/multipath)
1713 * @param bytelen: Length of addresses in bytes.
1714 * @param nexthop: Nexthop information
1715 * @param nlmsg: nlmsghdr structure to fill in.
1716 * @param req_size: The size allocated for the message.
1717 * @param src: pointer pointing to a location where
1718 * the prefsrc should be stored.
1719 *
1720 * The function returns true if the nexthop could be added
1721 * to the message, otherwise false is returned.
1722 */
1723 static bool _netlink_route_build_multipath(
1724 const struct prefix *p, const char *routedesc, int bytelen,
1725 const struct nexthop *nexthop, struct nlmsghdr *nlmsg, size_t req_size,
1726 struct rtmsg *rtmsg, const union g_addr **src, route_tag_t tag)
1727 {
1728 char label_buf[256];
1729 struct vrf *vrf;
1730 struct rtnexthop *rtnh;
1731
1732 rtnh = nl_attr_rtnh(nlmsg, req_size);
1733 if (rtnh == NULL)
1734 return false;
1735
1736 assert(nexthop);
1737
1738 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1739
1740 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1741 req_size, rtmsg, label_buf,
1742 sizeof(label_buf)))
1743 return false;
1744
1745 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1746 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1747
1748 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1749 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1750 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1751 return false;
1752 rtnh->rtnh_ifindex = nexthop->ifindex;
1753 if (nexthop->weight)
1754 rtnh->rtnh_hops = nexthop->weight - 1;
1755
1756 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1757 *src = &nexthop->rmap_src;
1758 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1759 *src = &nexthop->src;
1760
1761 if (IS_ZEBRA_DEBUG_KERNEL)
1762 zlog_debug(
1763 "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1764 __func__, routedesc, p, ipv4_ll_buf, label_buf,
1765 nexthop->ifindex, VRF_LOGNAME(vrf),
1766 nexthop->vrf_id);
1767 nl_attr_rtnh_end(nlmsg, rtnh);
1768 return true;
1769 }
1770
1771 if (nexthop->type == NEXTHOP_TYPE_IPV4
1772 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1773 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1774 nlmsg, req_size, bytelen,
1775 nexthop))
1776 return false;
1777
1778 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1779 *src = &nexthop->rmap_src;
1780 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1781 *src = &nexthop->src;
1782
1783 if (IS_ZEBRA_DEBUG_KERNEL)
1784 zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1785 __func__, routedesc, p, &nexthop->gate.ipv4,
1786 label_buf, nexthop->ifindex,
1787 VRF_LOGNAME(vrf), nexthop->vrf_id);
1788 }
1789 if (nexthop->type == NEXTHOP_TYPE_IPV6
1790 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1791 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1792 AF_INET6, nlmsg, req_size,
1793 bytelen, nexthop))
1794 return false;
1795
1796 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1797 *src = &nexthop->rmap_src;
1798 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1799 *src = &nexthop->src;
1800
1801 if (IS_ZEBRA_DEBUG_KERNEL)
1802 zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1803 __func__, routedesc, p, &nexthop->gate.ipv6,
1804 label_buf, nexthop->ifindex,
1805 VRF_LOGNAME(vrf), nexthop->vrf_id);
1806 }
1807
1808 /*
1809 * We have figured out the ifindex so we should always send it
1810 * This is especially useful if we are doing route
1811 * leaking.
1812 */
1813 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1814 rtnh->rtnh_ifindex = nexthop->ifindex;
1815
1816 /* ifindex */
1817 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1818 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1819 *src = &nexthop->rmap_src;
1820 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1821 *src = &nexthop->src;
1822
1823 if (IS_ZEBRA_DEBUG_KERNEL)
1824 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1825 __func__, routedesc, p, nexthop->ifindex,
1826 VRF_LOGNAME(vrf), nexthop->vrf_id);
1827 }
1828
1829 if (nexthop->weight)
1830 rtnh->rtnh_hops = nexthop->weight - 1;
1831
1832 if (!_netlink_set_tag(nlmsg, req_size, tag))
1833 return false;
1834
1835 nl_attr_rtnh_end(nlmsg, rtnh);
1836 return true;
1837 }
1838
1839 static inline bool
1840 _netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc,
1841 const struct zebra_nhlfe *nhlfe,
1842 struct nlmsghdr *nlmsg, struct rtmsg *rtmsg,
1843 size_t req_size, int cmd)
1844 {
1845 int bytelen;
1846 uint8_t family;
1847
1848 family = NHLFE_FAMILY(nhlfe);
1849 bytelen = (family == AF_INET ? 4 : 16);
1850 return _netlink_route_build_singlepath(p, routedesc, bytelen,
1851 nhlfe->nexthop, nlmsg, rtmsg,
1852 req_size, cmd);
1853 }
1854
1855
1856 static inline bool
1857 _netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
1858 const struct zebra_nhlfe *nhlfe,
1859 struct nlmsghdr *nlmsg, size_t req_size,
1860 struct rtmsg *rtmsg, const union g_addr **src)
1861 {
1862 int bytelen;
1863 uint8_t family;
1864
1865 family = NHLFE_FAMILY(nhlfe);
1866 bytelen = (family == AF_INET ? 4 : 16);
1867 return _netlink_route_build_multipath(p, routedesc, bytelen,
1868 nhlfe->nexthop, nlmsg, req_size,
1869 rtmsg, src, 0);
1870 }
1871
1872 static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
1873 {
1874 if (IS_ZEBRA_DEBUG_KERNEL)
1875 zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1876 routedesc, nl_msg_type_to_str(cmd), label);
1877 }
1878
1879 static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla,
1880 int llalen, ns_id_t ns_id, uint8_t family,
1881 bool permanent, uint8_t protocol)
1882 {
1883 struct {
1884 struct nlmsghdr n;
1885 struct ndmsg ndm;
1886 char buf[256];
1887 } req;
1888
1889 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
1890
1891 memset(&req, 0, sizeof(req));
1892
1893 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1894 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1895 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1896 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1897
1898 req.ndm.ndm_family = family;
1899 req.ndm.ndm_ifindex = ifindex;
1900 req.ndm.ndm_type = RTN_UNICAST;
1901 if (cmd == RTM_NEWNEIGH) {
1902 if (!permanent)
1903 req.ndm.ndm_state = NUD_REACHABLE;
1904 else
1905 req.ndm.ndm_state = NUD_PERMANENT;
1906 } else
1907 req.ndm.ndm_state = NUD_FAILED;
1908
1909 nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
1910 sizeof(protocol));
1911 req.ndm.ndm_type = RTN_UNICAST;
1912 nl_attr_put(&req.n, sizeof(req), NDA_DST, addr,
1913 family2addrsize(family));
1914 if (lla)
1915 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1916
1917 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1918 false);
1919 }
1920
1921 static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1922 union g_addr *src)
1923 {
1924 if (family == AF_INET) {
1925 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1926 src->ipv4 = nexthop->rmap_src.ipv4;
1927 return true;
1928 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1929 src->ipv4 = nexthop->src.ipv4;
1930 return true;
1931 }
1932 } else if (family == AF_INET6) {
1933 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1934 src->ipv6 = nexthop->rmap_src.ipv6;
1935 return true;
1936 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1937 src->ipv6 = nexthop->src.ipv6;
1938 return true;
1939 }
1940 }
1941
1942 return false;
1943 }
1944
1945 /*
1946 * The function returns true if the attribute could be added
1947 * to the message, otherwise false is returned.
1948 */
1949 static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
1950 struct nexthop *nh)
1951 {
1952 struct rtattr *nest;
1953
1954 switch (nh->nh_encap_type) {
1955 case NET_VXLAN:
1956 if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
1957 return false;
1958
1959 nest = nl_attr_nest(n, nlen, RTA_ENCAP);
1960 if (!nest)
1961 return false;
1962
1963 if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
1964 nh->nh_encap.vni))
1965 return false;
1966 nl_attr_nest_end(n, nest);
1967 break;
1968 }
1969
1970 return true;
1971 }
1972
1973 /*
1974 * Routing table change via netlink interface, using a dataplane context object
1975 *
1976 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
1977 * otherwise the number of bytes written to buf.
1978 */
1979 ssize_t netlink_route_multipath_msg_encode(int cmd,
1980 struct zebra_dplane_ctx *ctx,
1981 uint8_t *data, size_t datalen,
1982 bool fpm, bool force_nhg)
1983 {
1984 int bytelen;
1985 struct nexthop *nexthop = NULL;
1986 unsigned int nexthop_num;
1987 const char *routedesc;
1988 bool setsrc = false;
1989 union g_addr src;
1990 const struct prefix *p, *src_p;
1991 uint32_t table_id;
1992 struct nlsock *nl;
1993 route_tag_t tag = 0;
1994
1995 struct {
1996 struct nlmsghdr n;
1997 struct rtmsg r;
1998 char buf[];
1999 } *req = (void *)data;
2000
2001 p = dplane_ctx_get_dest(ctx);
2002 src_p = dplane_ctx_get_src(ctx);
2003
2004 if (datalen < sizeof(*req))
2005 return 0;
2006
2007 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2008
2009 memset(req, 0, sizeof(*req));
2010
2011 bytelen = (p->family == AF_INET ? 4 : 16);
2012
2013 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2014 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2015
2016 if ((cmd == RTM_NEWROUTE) &&
2017 ((p->family == AF_INET) || v6_rr_semantics))
2018 req->n.nlmsg_flags |= NLM_F_REPLACE;
2019
2020 req->n.nlmsg_type = cmd;
2021
2022 req->n.nlmsg_pid = nl->snl.nl_pid;
2023
2024 req->r.rtm_family = p->family;
2025 req->r.rtm_dst_len = p->prefixlen;
2026 req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
2027 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
2028
2029 if (cmd == RTM_DELROUTE)
2030 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
2031 else
2032 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
2033
2034 /*
2035 * blackhole routes are not RTN_UNICAST, they are
2036 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
2037 * so setting this value as a RTN_UNICAST would
2038 * cause the route lookup of just the prefix
2039 * to fail. So no need to specify this for
2040 * the RTM_DELROUTE case
2041 */
2042 if (cmd != RTM_DELROUTE)
2043 req->r.rtm_type = RTN_UNICAST;
2044
2045 if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
2046 return 0;
2047 if (src_p) {
2048 if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
2049 bytelen))
2050 return 0;
2051 }
2052
2053 /* Metric. */
2054 /* Hardcode the metric for all routes coming from zebra. Metric isn't
2055 * used
2056 * either by the kernel or by zebra. Its purely for calculating best
2057 * path(s)
2058 * by the routing protocol and for communicating with protocol peers.
2059 */
2060 if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
2061 NL_DEFAULT_ROUTE_METRIC))
2062 return 0;
2063
2064 #if defined(SUPPORT_REALMS)
2065 if (cmd == RTM_DELROUTE)
2066 tag = dplane_ctx_get_old_tag(ctx);
2067 else
2068 tag = dplane_ctx_get_tag(ctx);
2069 #endif
2070
2071 /* Table corresponding to this route. */
2072 table_id = dplane_ctx_get_table(ctx);
2073 if (table_id < 256)
2074 req->r.rtm_table = table_id;
2075 else {
2076 req->r.rtm_table = RT_TABLE_UNSPEC;
2077 if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
2078 return 0;
2079 }
2080
2081 if (IS_ZEBRA_DEBUG_KERNEL)
2082 zlog_debug(
2083 "%s: %s %pFX vrf %u(%u)", __func__,
2084 nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
2085 table_id);
2086
2087 /*
2088 * If we are not updating the route and we have received
2089 * a route delete, then all we need to fill in is the
2090 * prefix information to tell the kernel to schwack
2091 * it.
2092 */
2093 if (cmd == RTM_DELROUTE) {
2094 if (!_netlink_set_tag(&req->n, datalen, tag))
2095 return 0;
2096 return NLMSG_ALIGN(req->n.nlmsg_len);
2097 }
2098
2099 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
2100 struct rtattr *nest;
2101 uint32_t mtu = dplane_ctx_get_mtu(ctx);
2102 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
2103
2104 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
2105 mtu = nexthop_mtu;
2106
2107 nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
2108 if (nest == NULL)
2109 return 0;
2110
2111 if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
2112 return 0;
2113 nl_attr_nest_end(&req->n, nest);
2114 }
2115
2116 /*
2117 * Always install blackhole routes without using nexthops, because of
2118 * the following kernel problems:
2119 * 1. Kernel nexthops don't suport unreachable/prohibit route types.
2120 * 2. Blackhole kernel nexthops are deleted when loopback is down.
2121 */
2122 nexthop = dplane_ctx_get_ng(ctx)->nexthop;
2123 if (nexthop) {
2124 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2125 nexthop = nexthop->resolved;
2126
2127 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
2128 switch (nexthop->bh_type) {
2129 case BLACKHOLE_ADMINPROHIB:
2130 req->r.rtm_type = RTN_PROHIBIT;
2131 break;
2132 case BLACKHOLE_REJECT:
2133 req->r.rtm_type = RTN_UNREACHABLE;
2134 break;
2135 default:
2136 req->r.rtm_type = RTN_BLACKHOLE;
2137 break;
2138 }
2139 return NLMSG_ALIGN(req->n.nlmsg_len);
2140 }
2141 }
2142
2143 if ((!fpm && kernel_nexthops_supported()
2144 && (!proto_nexthops_only()
2145 || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0)))
2146 || (fpm && force_nhg)) {
2147 /* Kernel supports nexthop objects */
2148 if (IS_ZEBRA_DEBUG_KERNEL)
2149 zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
2150 dplane_ctx_get_nhe_id(ctx));
2151
2152 if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
2153 dplane_ctx_get_nhe_id(ctx)))
2154 return 0;
2155
2156 /* Have to determine src still */
2157 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2158 if (setsrc)
2159 break;
2160
2161 setsrc = nexthop_set_src(nexthop, p->family, &src);
2162 }
2163
2164 if (setsrc) {
2165 if (p->family == AF_INET) {
2166 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2167 &src.ipv4, bytelen))
2168 return 0;
2169 } else if (p->family == AF_INET6) {
2170 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2171 &src.ipv6, bytelen))
2172 return 0;
2173 }
2174 }
2175
2176 return NLMSG_ALIGN(req->n.nlmsg_len);
2177 }
2178
2179 /* Count overall nexthops so we can decide whether to use singlepath
2180 * or multipath case.
2181 */
2182 nexthop_num = 0;
2183 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2184 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2185 continue;
2186 if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
2187 continue;
2188
2189 nexthop_num++;
2190 }
2191
2192 /* Singlepath case. */
2193 if (nexthop_num == 1) {
2194 nexthop_num = 0;
2195 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2196 if (CHECK_FLAG(nexthop->flags,
2197 NEXTHOP_FLAG_RECURSIVE)) {
2198
2199 if (setsrc)
2200 continue;
2201
2202 setsrc = nexthop_set_src(nexthop, p->family,
2203 &src);
2204 continue;
2205 }
2206
2207 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2208 routedesc = nexthop->rparent
2209 ? "recursive, single-path"
2210 : "single-path";
2211
2212 if (!_netlink_set_tag(&req->n, datalen, tag))
2213 return 0;
2214
2215 if (!_netlink_route_build_singlepath(
2216 p, routedesc, bytelen, nexthop,
2217 &req->n, &req->r, datalen, cmd))
2218 return 0;
2219 nexthop_num++;
2220 break;
2221 }
2222
2223 /*
2224 * Add encapsulation information when installing via
2225 * FPM.
2226 */
2227 if (fpm) {
2228 if (!netlink_route_nexthop_encap(
2229 &req->n, datalen, nexthop))
2230 return 0;
2231 }
2232 }
2233
2234 if (setsrc) {
2235 if (p->family == AF_INET) {
2236 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2237 &src.ipv4, bytelen))
2238 return 0;
2239 } else if (p->family == AF_INET6) {
2240 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2241 &src.ipv6, bytelen))
2242 return 0;
2243 }
2244 }
2245 } else { /* Multipath case */
2246 struct rtattr *nest;
2247 const union g_addr *src1 = NULL;
2248
2249 nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
2250 if (nest == NULL)
2251 return 0;
2252
2253 nexthop_num = 0;
2254 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2255 if (CHECK_FLAG(nexthop->flags,
2256 NEXTHOP_FLAG_RECURSIVE)) {
2257 /* This only works for IPv4 now */
2258 if (setsrc)
2259 continue;
2260
2261 setsrc = nexthop_set_src(nexthop, p->family,
2262 &src);
2263 continue;
2264 }
2265
2266 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2267 routedesc = nexthop->rparent
2268 ? "recursive, multipath"
2269 : "multipath";
2270 nexthop_num++;
2271
2272 if (!_netlink_route_build_multipath(
2273 p, routedesc, bytelen, nexthop,
2274 &req->n, datalen, &req->r, &src1,
2275 tag))
2276 return 0;
2277
2278 if (!setsrc && src1) {
2279 if (p->family == AF_INET)
2280 src.ipv4 = src1->ipv4;
2281 else if (p->family == AF_INET6)
2282 src.ipv6 = src1->ipv6;
2283
2284 setsrc = 1;
2285 }
2286 }
2287 }
2288
2289 nl_attr_nest_end(&req->n, nest);
2290
2291 /*
2292 * Add encapsulation information when installing via
2293 * FPM.
2294 */
2295 if (fpm) {
2296 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
2297 nexthop)) {
2298 if (CHECK_FLAG(nexthop->flags,
2299 NEXTHOP_FLAG_RECURSIVE))
2300 continue;
2301 if (!netlink_route_nexthop_encap(
2302 &req->n, datalen, nexthop))
2303 return 0;
2304 }
2305 }
2306
2307
2308 if (setsrc) {
2309 if (p->family == AF_INET) {
2310 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2311 &src.ipv4, bytelen))
2312 return 0;
2313 } else if (p->family == AF_INET6) {
2314 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2315 &src.ipv6, bytelen))
2316 return 0;
2317 }
2318 if (IS_ZEBRA_DEBUG_KERNEL)
2319 zlog_debug("Setting source");
2320 }
2321 }
2322
2323 /* If there is no useful nexthop then return. */
2324 if (nexthop_num == 0) {
2325 if (IS_ZEBRA_DEBUG_KERNEL)
2326 zlog_debug("%s: No useful nexthop.", __func__);
2327 }
2328
2329 return NLMSG_ALIGN(req->n.nlmsg_len);
2330 }
2331
2332 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
2333 {
2334 uint32_t actual_table;
2335 int suc = 0;
2336 struct mcast_route_data *mr = (struct mcast_route_data *)in;
2337 struct {
2338 struct nlmsghdr n;
2339 struct rtmsg rtm;
2340 char buf[256];
2341 } req;
2342
2343 mroute = mr;
2344 struct zebra_ns *zns;
2345
2346 zns = zvrf->zns;
2347 memset(&req, 0, sizeof(req));
2348
2349 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2350 req.n.nlmsg_flags = NLM_F_REQUEST;
2351 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2352
2353 req.n.nlmsg_type = RTM_GETROUTE;
2354
2355 if (mroute->family == AF_INET) {
2356 req.rtm.rtm_family = RTNL_FAMILY_IPMR;
2357 req.rtm.rtm_dst_len = IPV4_MAX_BITLEN;
2358 req.rtm.rtm_src_len = IPV4_MAX_BITLEN;
2359
2360 nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2361 &mroute->src.ipaddr_v4,
2362 sizeof(mroute->src.ipaddr_v4));
2363 nl_attr_put(&req.n, sizeof(req), RTA_DST,
2364 &mroute->grp.ipaddr_v4,
2365 sizeof(mroute->grp.ipaddr_v4));
2366 } else {
2367 req.rtm.rtm_family = RTNL_FAMILY_IP6MR;
2368 req.rtm.rtm_dst_len = IPV6_MAX_BITLEN;
2369 req.rtm.rtm_src_len = IPV6_MAX_BITLEN;
2370
2371 nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2372 &mroute->src.ipaddr_v6,
2373 sizeof(mroute->src.ipaddr_v6));
2374 nl_attr_put(&req.n, sizeof(req), RTA_DST,
2375 &mroute->grp.ipaddr_v6,
2376 sizeof(mroute->grp.ipaddr_v6));
2377 }
2378
2379 /*
2380 * What?
2381 *
2382 * So during the namespace cleanup we started storing
2383 * the zvrf table_id for the default table as RT_TABLE_MAIN
2384 * which is what the normal routing table for ip routing is.
2385 * This change caused this to break our lookups of sg data
2386 * because prior to this change the zvrf->table_id was 0
2387 * and when the pim multicast kernel code saw a 0,
2388 * it was auto-translated to RT_TABLE_DEFAULT. But since
2389 * we are now passing in RT_TABLE_MAIN there is no auto-translation
2390 * and the kernel goes screw you and the delicious cookies you
2391 * are trying to give me. So now we have this little hack.
2392 */
2393 if (mroute->family == AF_INET)
2394 actual_table = (zvrf->table_id == RT_TABLE_MAIN)
2395 ? RT_TABLE_DEFAULT
2396 : zvrf->table_id;
2397 else
2398 actual_table = zvrf->table_id;
2399
2400 nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
2401
2402 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
2403 &zns->netlink_cmd, zns, false);
2404
2405 mroute = NULL;
2406 return suc;
2407 }
2408
2409 /* Char length to debug ID with */
2410 #define ID_LENGTH 10
2411
2412 static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
2413 uint32_t id,
2414 const struct nh_grp *z_grp,
2415 const uint8_t count)
2416 {
2417 struct nexthop_grp grp[count];
2418 /* Need space for max group size, "/", and null term */
2419 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
2420 char buf1[ID_LENGTH + 2];
2421
2422 buf[0] = '\0';
2423
2424 memset(grp, 0, sizeof(grp));
2425
2426 if (count) {
2427 for (int i = 0; i < count; i++) {
2428 grp[i].id = z_grp[i].id;
2429 grp[i].weight = z_grp[i].weight - 1;
2430
2431 if (IS_ZEBRA_DEBUG_KERNEL) {
2432 if (i == 0)
2433 snprintf(buf, sizeof(buf1), "group %u",
2434 grp[i].id);
2435 else {
2436 snprintf(buf1, sizeof(buf1), "/%u",
2437 grp[i].id);
2438 strlcat(buf, buf1, sizeof(buf));
2439 }
2440 }
2441 }
2442 if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2443 count * sizeof(*grp)))
2444 return false;
2445 }
2446
2447 if (IS_ZEBRA_DEBUG_KERNEL)
2448 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
2449
2450 return true;
2451 }
2452
2453 /**
2454 * Next hop packet encoding helper function.
2455 *
2456 * \param[in] cmd netlink command.
2457 * \param[in] ctx dataplane context (information snapshot).
2458 * \param[out] buf buffer to hold the packet.
2459 * \param[in] buflen amount of buffer bytes.
2460 *
2461 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2462 * otherwise the number of bytes written to buf.
2463 */
2464 ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2465 const struct zebra_dplane_ctx *ctx,
2466 void *buf, size_t buflen)
2467 {
2468 struct {
2469 struct nlmsghdr n;
2470 struct nhmsg nhm;
2471 char buf[];
2472 } *req = buf;
2473
2474 mpls_lse_t out_lse[MPLS_MAX_LABELS];
2475 char label_buf[256];
2476 int num_labels = 0;
2477 uint32_t id = dplane_ctx_get_nhe_id(ctx);
2478 int type = dplane_ctx_get_nhe_type(ctx);
2479 struct rtattr *nest;
2480 uint16_t encap;
2481 struct nlsock *nl =
2482 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2483
2484 if (!id) {
2485 flog_err(
2486 EC_ZEBRA_NHG_FIB_UPDATE,
2487 "Failed trying to update a nexthop group in the kernel that does not have an ID");
2488 return -1;
2489 }
2490
2491 /*
2492 * Nothing to do if the kernel doesn't support nexthop objects or
2493 * we dont want to install this type of NHG
2494 */
2495 if (!kernel_nexthops_supported()) {
2496 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2497 zlog_debug(
2498 "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring",
2499 __func__, id, zebra_route_string(type));
2500 return 0;
2501 }
2502
2503 if (proto_nexthops_only() && !is_proto_nhg(id, type)) {
2504 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2505 zlog_debug(
2506 "%s: nhg_id %u (%s): proto-based nexthops only, ignoring",
2507 __func__, id, zebra_route_string(type));
2508 return 0;
2509 }
2510
2511 label_buf[0] = '\0';
2512
2513 if (buflen < sizeof(*req))
2514 return 0;
2515
2516 memset(req, 0, sizeof(*req));
2517
2518 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2519 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2520
2521 if (cmd == RTM_NEWNEXTHOP)
2522 req->n.nlmsg_flags |= NLM_F_REPLACE;
2523
2524 req->n.nlmsg_type = cmd;
2525 req->n.nlmsg_pid = nl->snl.nl_pid;
2526
2527 req->nhm.nh_family = AF_UNSPEC;
2528 /* TODO: Scope? */
2529
2530 if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2531 return 0;
2532
2533 if (cmd == RTM_NEWNEXTHOP) {
2534 /*
2535 * We distinguish between a "group", which is a collection
2536 * of ids, and a singleton nexthop with an id. The
2537 * group is installed as an id that just refers to a list of
2538 * other ids.
2539 */
2540 if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
2541 if (!_netlink_nexthop_build_group(
2542 &req->n, buflen, id,
2543 dplane_ctx_get_nhe_nh_grp(ctx),
2544 dplane_ctx_get_nhe_nh_grp_count(ctx)))
2545 return 0;
2546 } else {
2547 const struct nexthop *nh =
2548 dplane_ctx_get_nhe_ng(ctx)->nexthop;
2549 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
2550
2551 if (afi == AFI_IP)
2552 req->nhm.nh_family = AF_INET;
2553 else if (afi == AFI_IP6)
2554 req->nhm.nh_family = AF_INET6;
2555
2556 switch (nh->type) {
2557 case NEXTHOP_TYPE_IPV4:
2558 case NEXTHOP_TYPE_IPV4_IFINDEX:
2559 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2560 &nh->gate.ipv4,
2561 IPV4_MAX_BYTELEN))
2562 return 0;
2563 break;
2564 case NEXTHOP_TYPE_IPV6:
2565 case NEXTHOP_TYPE_IPV6_IFINDEX:
2566 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2567 &nh->gate.ipv6,
2568 IPV6_MAX_BYTELEN))
2569 return 0;
2570 break;
2571 case NEXTHOP_TYPE_BLACKHOLE:
2572 if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2573 NULL, 0))
2574 return 0;
2575 /* Blackhole shouldn't have anymore attributes
2576 */
2577 goto nexthop_done;
2578 case NEXTHOP_TYPE_IFINDEX:
2579 /* Don't need anymore info for this */
2580 break;
2581 }
2582
2583 if (!nh->ifindex) {
2584 flog_err(
2585 EC_ZEBRA_NHG_FIB_UPDATE,
2586 "Context received for kernel nexthop update without an interface");
2587 return -1;
2588 }
2589
2590 if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2591 nh->ifindex))
2592 return 0;
2593
2594 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
2595 req->nhm.nh_flags |= RTNH_F_ONLINK;
2596
2597 num_labels =
2598 build_label_stack(nh->nh_label, out_lse,
2599 label_buf, sizeof(label_buf));
2600
2601 if (num_labels) {
2602 /* Set the BoS bit */
2603 out_lse[num_labels - 1] |=
2604 htonl(1 << MPLS_LS_S_SHIFT);
2605
2606 /*
2607 * TODO: MPLS unsupported for now in kernel.
2608 */
2609 if (req->nhm.nh_family == AF_MPLS)
2610 goto nexthop_done;
2611
2612 encap = LWTUNNEL_ENCAP_MPLS;
2613 if (!nl_attr_put16(&req->n, buflen,
2614 NHA_ENCAP_TYPE, encap))
2615 return 0;
2616 nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP);
2617 if (!nest)
2618 return 0;
2619 if (!nl_attr_put(
2620 &req->n, buflen, MPLS_IPTUNNEL_DST,
2621 &out_lse,
2622 num_labels * sizeof(mpls_lse_t)))
2623 return 0;
2624
2625 nl_attr_nest_end(&req->n, nest);
2626 }
2627
2628 if (nh->nh_srv6) {
2629 if (nh->nh_srv6->seg6local_action !=
2630 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
2631 uint32_t action;
2632 uint16_t encap;
2633 struct rtattr *nest;
2634 const struct seg6local_context *ctx;
2635
2636 req->nhm.nh_family = AF_INET6;
2637 action = nh->nh_srv6->seg6local_action;
2638 ctx = &nh->nh_srv6->seg6local_ctx;
2639 encap = LWTUNNEL_ENCAP_SEG6_LOCAL;
2640 if (!nl_attr_put(&req->n, buflen,
2641 NHA_ENCAP_TYPE,
2642 &encap,
2643 sizeof(uint16_t)))
2644 return 0;
2645
2646 nest = nl_attr_nest(&req->n, buflen,
2647 NHA_ENCAP | NLA_F_NESTED);
2648 if (!nest)
2649 return 0;
2650
2651 switch (action) {
2652 case SEG6_LOCAL_ACTION_END:
2653 if (!nl_attr_put32(
2654 &req->n, buflen,
2655 SEG6_LOCAL_ACTION,
2656 SEG6_LOCAL_ACTION_END))
2657 return 0;
2658 break;
2659 case SEG6_LOCAL_ACTION_END_X:
2660 if (!nl_attr_put32(
2661 &req->n, buflen,
2662 SEG6_LOCAL_ACTION,
2663 SEG6_LOCAL_ACTION_END_X))
2664 return 0;
2665 if (!nl_attr_put(
2666 &req->n, buflen,
2667 SEG6_LOCAL_NH6, &ctx->nh6,
2668 sizeof(struct in6_addr)))
2669 return 0;
2670 break;
2671 case SEG6_LOCAL_ACTION_END_T:
2672 if (!nl_attr_put32(
2673 &req->n, buflen,
2674 SEG6_LOCAL_ACTION,
2675 SEG6_LOCAL_ACTION_END_T))
2676 return 0;
2677 if (!nl_attr_put32(
2678 &req->n, buflen,
2679 SEG6_LOCAL_TABLE,
2680 ctx->table))
2681 return 0;
2682 break;
2683 case SEG6_LOCAL_ACTION_END_DX4:
2684 if (!nl_attr_put32(
2685 &req->n, buflen,
2686 SEG6_LOCAL_ACTION,
2687 SEG6_LOCAL_ACTION_END_DX4))
2688 return 0;
2689 if (!nl_attr_put(
2690 &req->n, buflen,
2691 SEG6_LOCAL_NH4, &ctx->nh4,
2692 sizeof(struct in_addr)))
2693 return 0;
2694 break;
2695 case SEG6_LOCAL_ACTION_END_DT6:
2696 if (!nl_attr_put32(
2697 &req->n, buflen,
2698 SEG6_LOCAL_ACTION,
2699 SEG6_LOCAL_ACTION_END_DT6))
2700 return 0;
2701 if (!nl_attr_put32(
2702 &req->n, buflen,
2703 SEG6_LOCAL_TABLE,
2704 ctx->table))
2705 return 0;
2706 break;
2707 case SEG6_LOCAL_ACTION_END_DT4:
2708 if (!nl_attr_put32(
2709 &req->n, buflen,
2710 SEG6_LOCAL_ACTION,
2711 SEG6_LOCAL_ACTION_END_DT4))
2712 return 0;
2713 if (!nl_attr_put32(
2714 &req->n, buflen,
2715 SEG6_LOCAL_VRFTABLE,
2716 ctx->table))
2717 return 0;
2718 break;
2719 case SEG6_LOCAL_ACTION_END_DT46:
2720 if (!nl_attr_put32(
2721 &req->n, buflen,
2722 SEG6_LOCAL_ACTION,
2723 SEG6_LOCAL_ACTION_END_DT46))
2724 return 0;
2725 if (!nl_attr_put32(
2726 &req->n, buflen,
2727 SEG6_LOCAL_VRFTABLE,
2728 ctx->table))
2729 return 0;
2730 break;
2731 default:
2732 zlog_err("%s: unsupport seg6local behaviour action=%u",
2733 __func__, action);
2734 return 0;
2735 }
2736 nl_attr_nest_end(&req->n, nest);
2737 }
2738
2739 if (!sid_zero(&nh->nh_srv6->seg6_segs)) {
2740 char tun_buf[4096];
2741 ssize_t tun_len;
2742 struct rtattr *nest;
2743
2744 if (!nl_attr_put16(&req->n, buflen,
2745 NHA_ENCAP_TYPE,
2746 LWTUNNEL_ENCAP_SEG6))
2747 return 0;
2748 nest = nl_attr_nest(&req->n, buflen,
2749 NHA_ENCAP | NLA_F_NESTED);
2750 if (!nest)
2751 return 0;
2752 tun_len = fill_seg6ipt_encap(tun_buf,
2753 sizeof(tun_buf),
2754 &nh->nh_srv6->seg6_segs);
2755 if (tun_len < 0)
2756 return 0;
2757 if (!nl_attr_put(&req->n, buflen,
2758 SEG6_IPTUNNEL_SRH,
2759 tun_buf, tun_len))
2760 return 0;
2761 nl_attr_nest_end(&req->n, nest);
2762 }
2763 }
2764
2765 nexthop_done:
2766
2767 if (IS_ZEBRA_DEBUG_KERNEL)
2768 zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2769 __func__, id, nh, nh->ifindex,
2770 vrf_id_to_name(nh->vrf_id),
2771 nh->vrf_id, label_buf);
2772 }
2773
2774 req->nhm.nh_protocol = zebra2proto(type);
2775
2776 } else if (cmd != RTM_DELNEXTHOP) {
2777 flog_err(
2778 EC_ZEBRA_NHG_FIB_UPDATE,
2779 "Nexthop group kernel update command (%d) does not exist",
2780 cmd);
2781 return -1;
2782 }
2783
2784 if (IS_ZEBRA_DEBUG_KERNEL)
2785 zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2786 id);
2787
2788 return NLMSG_ALIGN(req->n.nlmsg_len);
2789 }
2790
2791 static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx,
2792 void *buf, size_t buflen)
2793 {
2794 enum dplane_op_e op;
2795 int cmd = 0;
2796
2797 op = dplane_ctx_get_op(ctx);
2798 if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
2799 cmd = RTM_NEWNEXTHOP;
2800 else if (op == DPLANE_OP_NH_DELETE)
2801 cmd = RTM_DELNEXTHOP;
2802 else {
2803 flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2804 "Context received for kernel nexthop update with incorrect OP code (%u)",
2805 op);
2806 return -1;
2807 }
2808
2809 return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen);
2810 }
2811
2812 enum netlink_msg_status
2813 netlink_put_nexthop_update_msg(struct nl_batch *bth,
2814 struct zebra_dplane_ctx *ctx)
2815 {
2816 /* Nothing to do if the kernel doesn't support nexthop objects */
2817 if (!kernel_nexthops_supported())
2818 return FRR_NETLINK_SUCCESS;
2819
2820 return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder,
2821 false);
2822 }
2823
2824 static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2825 void *buf, size_t buflen)
2826 {
2827 return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf,
2828 buflen, false, false);
2829 }
2830
2831 static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2832 void *buf, size_t buflen)
2833 {
2834 return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf,
2835 buflen, false, false);
2836 }
2837
2838 enum netlink_msg_status
2839 netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
2840 {
2841 int cmd;
2842 const struct prefix *p = dplane_ctx_get_dest(ctx);
2843
2844 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2845 cmd = RTM_DELROUTE;
2846 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2847 cmd = RTM_NEWROUTE;
2848 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2849
2850 if (p->family == AF_INET || v6_rr_semantics) {
2851 /* Single 'replace' operation */
2852
2853 /*
2854 * With route replace semantics in place
2855 * for v4 routes and the new route is a system
2856 * route we do not install anything.
2857 * The problem here is that the new system
2858 * route should cause us to withdraw from
2859 * the kernel the old non-system route
2860 */
2861 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))
2862 && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2863 return netlink_batch_add_msg(
2864 bth, ctx, netlink_delroute_msg_encoder,
2865 true);
2866 } else {
2867 /*
2868 * So v6 route replace semantics are not in
2869 * the kernel at this point as I understand it.
2870 * so let's do a delete then an add.
2871 * In the future once v6 route replace semantics
2872 * are in we can figure out what to do here to
2873 * allow working with old and new kernels.
2874 *
2875 * I'm also intentionally ignoring the failure case
2876 * of the route delete. If that happens yeah we're
2877 * screwed.
2878 */
2879 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2880 netlink_batch_add_msg(
2881 bth, ctx, netlink_delroute_msg_encoder,
2882 true);
2883 }
2884
2885 cmd = RTM_NEWROUTE;
2886 } else
2887 return FRR_NETLINK_ERROR;
2888
2889 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
2890 return FRR_NETLINK_SUCCESS;
2891
2892 return netlink_batch_add_msg(bth, ctx,
2893 cmd == RTM_NEWROUTE
2894 ? netlink_newroute_msg_encoder
2895 : netlink_delroute_msg_encoder,
2896 false);
2897 }
2898
2899 /**
2900 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2901 *
2902 * @tb: Netlink RTA data
2903 * @family: Address family in the nhmsg
2904 * @ifp: Interface connected - this should be NULL, we fill it in
2905 * @ns_id: Namspace id
2906 *
2907 * Return: New nexthop
2908 */
2909 static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2910 unsigned char family,
2911 struct interface **ifp,
2912 ns_id_t ns_id)
2913 {
2914 struct nexthop nh = {};
2915 void *gate = NULL;
2916 enum nexthop_types_t type = 0;
2917 int if_index = 0;
2918 size_t sz = 0;
2919 struct interface *ifp_lookup;
2920
2921 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2922
2923
2924 if (tb[NHA_GATEWAY]) {
2925 switch (family) {
2926 case AF_INET:
2927 type = NEXTHOP_TYPE_IPV4_IFINDEX;
2928 sz = 4;
2929 break;
2930 case AF_INET6:
2931 type = NEXTHOP_TYPE_IPV6_IFINDEX;
2932 sz = 16;
2933 break;
2934 default:
2935 flog_warn(
2936 EC_ZEBRA_BAD_NHG_MESSAGE,
2937 "Nexthop gateway with bad address family (%d) received from kernel",
2938 family);
2939 return nh;
2940 }
2941 gate = RTA_DATA(tb[NHA_GATEWAY]);
2942 } else
2943 type = NEXTHOP_TYPE_IFINDEX;
2944
2945 if (type)
2946 nh.type = type;
2947
2948 if (gate)
2949 memcpy(&(nh.gate), gate, sz);
2950
2951 if (if_index)
2952 nh.ifindex = if_index;
2953
2954 ifp_lookup =
2955 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
2956
2957 if (ifp)
2958 *ifp = ifp_lookup;
2959 if (ifp_lookup)
2960 nh.vrf_id = ifp_lookup->vrf->vrf_id;
2961 else {
2962 flog_warn(
2963 EC_ZEBRA_UNKNOWN_INTERFACE,
2964 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
2965 __func__, nh.ifindex);
2966
2967 nh.vrf_id = VRF_DEFAULT;
2968 }
2969
2970 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
2971 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
2972 int num_labels = 0;
2973
2974 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
2975
2976 if (encap_type == LWTUNNEL_ENCAP_MPLS)
2977 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
2978
2979 if (num_labels)
2980 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
2981 labels);
2982 }
2983
2984 return nh;
2985 }
2986
2987 static int netlink_nexthop_process_group(struct rtattr **tb,
2988 struct nh_grp *z_grp, int z_grp_size)
2989 {
2990 uint8_t count = 0;
2991 /* linux/nexthop.h group struct */
2992 struct nexthop_grp *n_grp = NULL;
2993
2994 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
2995 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
2996
2997 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
2998 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
2999 "Invalid nexthop group received from the kernel");
3000 return count;
3001 }
3002
3003 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
3004 z_grp[i].id = n_grp[i].id;
3005 z_grp[i].weight = n_grp[i].weight + 1;
3006 }
3007 return count;
3008 }
3009
3010 /**
3011 * netlink_nexthop_change() - Read in change about nexthops from the kernel
3012 *
3013 * @h: Netlink message header
3014 * @ns_id: Namspace id
3015 * @startup: Are we reading under startup conditions?
3016 *
3017 * Return: Result status
3018 */
3019 int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3020 {
3021 int len;
3022 /* nexthop group id */
3023 uint32_t id;
3024 unsigned char family;
3025 int type;
3026 afi_t afi = AFI_UNSPEC;
3027 vrf_id_t vrf_id = VRF_DEFAULT;
3028 struct interface *ifp = NULL;
3029 struct nhmsg *nhm = NULL;
3030 struct nexthop nh = {};
3031 struct nh_grp grp[MULTIPATH_NUM] = {};
3032 /* Count of nexthops in group array */
3033 uint8_t grp_count = 0;
3034 struct rtattr *tb[NHA_MAX + 1] = {};
3035
3036 frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup);
3037
3038 nhm = NLMSG_DATA(h);
3039
3040 if (ns_id)
3041 vrf_id = ns_id;
3042
3043 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
3044 return 0;
3045
3046 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
3047 if (len < 0) {
3048 zlog_warn(
3049 "%s: Message received from netlink is of a broken size %d %zu",
3050 __func__, h->nlmsg_len,
3051 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
3052 return -1;
3053 }
3054
3055 netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len,
3056 NLA_F_NESTED);
3057
3058
3059 if (!tb[NHA_ID]) {
3060 flog_warn(
3061 EC_ZEBRA_BAD_NHG_MESSAGE,
3062 "Nexthop group without an ID received from the kernel");
3063 return -1;
3064 }
3065
3066 /* We use the ID key'd nhg table for kernel updates */
3067 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
3068
3069 if (zebra_evpn_mh_is_fdb_nh(id)) {
3070 /* If this is a L2 NH just ignore it */
3071 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3072 zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
3073 h->nlmsg_type, id);
3074 }
3075 return 0;
3076 }
3077
3078 family = nhm->nh_family;
3079 afi = family2afi(family);
3080
3081 type = proto2zebra(nhm->nh_protocol, 0, true);
3082
3083 if (IS_ZEBRA_DEBUG_KERNEL)
3084 zlog_debug("%s ID (%u) %s NS %u",
3085 nl_msg_type_to_str(h->nlmsg_type), id,
3086 nl_family_to_str(family), ns_id);
3087
3088
3089 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
3090 if (tb[NHA_GROUP]) {
3091 /**
3092 * If this is a group message its only going to have
3093 * an array of nexthop IDs associated with it
3094 */
3095 grp_count = netlink_nexthop_process_group(
3096 tb, grp, array_size(grp));
3097 } else {
3098 if (tb[NHA_BLACKHOLE]) {
3099 /**
3100 * This nexthop is just for blackhole-ing
3101 * traffic, it should not have an OIF, GATEWAY,
3102 * or ENCAP
3103 */
3104 nh.type = NEXTHOP_TYPE_BLACKHOLE;
3105 nh.bh_type = BLACKHOLE_UNSPEC;
3106 } else if (tb[NHA_OIF])
3107 /**
3108 * This is a true new nexthop, so we need
3109 * to parse the gateway and device info
3110 */
3111 nh = netlink_nexthop_process_nh(tb, family,
3112 &ifp, ns_id);
3113 else {
3114
3115 flog_warn(
3116 EC_ZEBRA_BAD_NHG_MESSAGE,
3117 "Invalid Nexthop message received from the kernel with ID (%u)",
3118 id);
3119 return -1;
3120 }
3121 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
3122 if (nhm->nh_flags & RTNH_F_ONLINK)
3123 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
3124 vrf_id = nh.vrf_id;
3125 }
3126
3127 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
3128 type, startup))
3129 return -1;
3130
3131 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
3132 zebra_nhg_kernel_del(id, vrf_id);
3133
3134 return 0;
3135 }
3136
3137 /**
3138 * netlink_request_nexthop() - Request nextop information from the kernel
3139 * @zns: Zebra namespace
3140 * @family: AF_* netlink family
3141 * @type: RTM_* route type
3142 *
3143 * Return: Result status
3144 */
3145 static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
3146 {
3147 struct {
3148 struct nlmsghdr n;
3149 struct nhmsg nhm;
3150 } req;
3151
3152 /* Form the request, specifying filter (rtattr) if needed. */
3153 memset(&req, 0, sizeof(req));
3154 req.n.nlmsg_type = type;
3155 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3156 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3157 req.nhm.nh_family = family;
3158
3159 return netlink_request(&zns->netlink_cmd, &req);
3160 }
3161
3162
3163 /**
3164 * netlink_nexthop_read() - Nexthop read function using netlink interface
3165 *
3166 * @zns: Zebra name space
3167 *
3168 * Return: Result status
3169 * Only called at bootstrap time.
3170 */
3171 int netlink_nexthop_read(struct zebra_ns *zns)
3172 {
3173 int ret;
3174 struct zebra_dplane_info dp_info;
3175
3176 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3177
3178 /* Get nexthop objects */
3179 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
3180 if (ret < 0)
3181 return ret;
3182 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
3183 &dp_info, 0, true);
3184
3185 if (!ret)
3186 /* If we succesfully read in nexthop objects,
3187 * this kernel must support them.
3188 */
3189 supports_nh = true;
3190 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
3191 zlog_debug("Nexthop objects %ssupported on this kernel",
3192 supports_nh ? "" : "not ");
3193
3194 zebra_router_set_supports_nhgs(supports_nh);
3195
3196 return ret;
3197 }
3198
3199
3200 int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen,
3201 ns_id_t ns_id, uint8_t family, bool permanent)
3202 {
3203 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
3204 addr, lla, llalen, ns_id, family, permanent,
3205 RTPROT_ZEBRA);
3206 }
3207
3208 /**
3209 * netlink_neigh_update_msg_encode() - Common helper api for encoding
3210 * evpn neighbor update as netlink messages using dataplane context object.
3211 * Here, a neighbor refers to a bridge forwarding database entry for
3212 * either unicast forwarding or head-end replication or an IP neighbor
3213 * entry.
3214 * @ctx: Dataplane context
3215 * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
3216 * @lla: A pointer to neighbor cache link layer address
3217 * @llalen: Length of the pointer to neighbor cache link layer
3218 * address
3219 * @ip: A neighbor cache n/w layer destination address
3220 * In the case of bridge FDB, this represnts the remote
3221 * VTEP IP.
3222 * @replace_obj: Whether NEW request should replace existing object or
3223 * add to the end of the list
3224 * @family: AF_* netlink family
3225 * @type: RTN_* route type
3226 * @flags: NTF_* flags
3227 * @state: NUD_* states
3228 * @data: data buffer pointer
3229 * @datalen: total amount of data buffer space
3230 * @protocol: protocol information
3231 *
3232 * Return: 0 when the msg doesn't fit entirely in the buffer
3233 * otherwise the number of bytes written to buf.
3234 */
3235 static ssize_t netlink_neigh_update_msg_encode(
3236 const struct zebra_dplane_ctx *ctx, int cmd, const void *lla,
3237 int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family,
3238 uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy,
3239 uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data,
3240 size_t datalen, uint8_t protocol)
3241 {
3242 struct {
3243 struct nlmsghdr n;
3244 struct ndmsg ndm;
3245 char buf[];
3246 } *req = data;
3247 int ipa_len;
3248 enum dplane_op_e op;
3249
3250 if (datalen < sizeof(*req))
3251 return 0;
3252 memset(req, 0, sizeof(*req));
3253
3254 op = dplane_ctx_get_op(ctx);
3255
3256 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3257 req->n.nlmsg_flags = NLM_F_REQUEST;
3258 if (cmd == RTM_NEWNEIGH)
3259 req->n.nlmsg_flags |=
3260 NLM_F_CREATE
3261 | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
3262 req->n.nlmsg_type = cmd;
3263 req->ndm.ndm_family = family;
3264 req->ndm.ndm_type = type;
3265 req->ndm.ndm_state = state;
3266 req->ndm.ndm_flags = flags;
3267 req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
3268
3269 if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
3270 sizeof(protocol)))
3271 return 0;
3272
3273 if (lla) {
3274 if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen))
3275 return 0;
3276 }
3277
3278 if (nfy) {
3279 struct rtattr *nest;
3280
3281 nest = nl_attr_nest(&req->n, datalen,
3282 NDA_FDB_EXT_ATTRS | NLA_F_NESTED);
3283 if (!nest)
3284 return 0;
3285
3286 if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY,
3287 &nfy_flags, sizeof(nfy_flags)))
3288 return 0;
3289 if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0))
3290 return 0;
3291
3292 nl_attr_nest_end(&req->n, nest);
3293 }
3294
3295
3296 if (ext) {
3297 if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags,
3298 sizeof(ext_flags)))
3299 return 0;
3300 }
3301
3302 if (nhg_id) {
3303 if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
3304 return 0;
3305 } else {
3306 ipa_len =
3307 IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
3308 if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr,
3309 ipa_len))
3310 return 0;
3311 }
3312
3313 if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
3314 vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
3315
3316 if (vid > 0) {
3317 if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
3318 return 0;
3319 }
3320
3321 if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
3322 dplane_ctx_mac_get_br_ifindex(ctx)))
3323 return 0;
3324 }
3325
3326 return NLMSG_ALIGN(req->n.nlmsg_len);
3327 }
3328
3329 /*
3330 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
3331 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
3332 */
3333 static ssize_t
3334 netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd,
3335 void *buf, size_t buflen)
3336 {
3337 struct ethaddr dst_mac = {.octet = {0}};
3338 int proto = RTPROT_ZEBRA;
3339
3340 if (dplane_ctx_get_type(ctx) != 0)
3341 proto = zebra2proto(dplane_ctx_get_type(ctx));
3342
3343 return netlink_neigh_update_msg_encode(
3344 ctx, cmd, (const void *)&dst_mac, ETH_ALEN,
3345 dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF,
3346 (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/,
3347 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen,
3348 proto);
3349 }
3350
3351 #ifndef NDA_RTA
3352 #define NDA_RTA(r) \
3353 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
3354 #endif
3355
3356 static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3357 {
3358 struct ndmsg *ndm;
3359 struct interface *ifp;
3360 struct zebra_if *zif;
3361 struct rtattr *tb[NDA_MAX + 1];
3362 struct interface *br_if;
3363 struct ethaddr mac;
3364 vlanid_t vid = 0;
3365 struct in_addr vtep_ip;
3366 int vid_present = 0, dst_present = 0;
3367 char vid_buf[20];
3368 char dst_buf[30];
3369 bool sticky;
3370 bool local_inactive = false;
3371 bool dp_static = false;
3372 uint32_t nhg_id = 0;
3373
3374 ndm = NLMSG_DATA(h);
3375
3376 /* We only process macfdb notifications if EVPN is enabled */
3377 if (!is_evpn_enabled())
3378 return 0;
3379
3380 /* Parse attributes and extract fields of interest. Do basic
3381 * validation of the fields.
3382 */
3383 netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len,
3384 NLA_F_NESTED);
3385
3386 if (!tb[NDA_LLADDR]) {
3387 if (IS_ZEBRA_DEBUG_KERNEL)
3388 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
3389 nl_msg_type_to_str(h->nlmsg_type),
3390 ndm->ndm_ifindex);
3391 return 0;
3392 }
3393
3394 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
3395 if (IS_ZEBRA_DEBUG_KERNEL)
3396 zlog_debug(
3397 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
3398 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
3399 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
3400 return 0;
3401 }
3402
3403 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
3404
3405 if (tb[NDA_VLAN]) {
3406 vid_present = 1;
3407 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
3408 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3409 }
3410
3411 if (tb[NDA_DST]) {
3412 /* TODO: Only IPv4 supported now. */
3413 dst_present = 1;
3414 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
3415 IPV4_MAX_BYTELEN);
3416 snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4",
3417 &vtep_ip);
3418 }
3419
3420 if (tb[NDA_NH_ID])
3421 nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
3422
3423 if (ndm->ndm_state & NUD_STALE)
3424 local_inactive = true;
3425
3426 if (tb[NDA_FDB_EXT_ATTRS]) {
3427 struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS];
3428 struct rtattr *nfea_tb[NFEA_MAX + 1] = {0};
3429
3430 netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr);
3431 if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
3432 uint8_t nfy_flags;
3433
3434 nfy_flags = *(uint8_t *)RTA_DATA(
3435 nfea_tb[NFEA_ACTIVITY_NOTIFY]);
3436 if (nfy_flags & FDB_NOTIFY_BIT)
3437 dp_static = true;
3438 if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT)
3439 local_inactive = true;
3440 }
3441 }
3442
3443 if (IS_ZEBRA_DEBUG_KERNEL)
3444 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d",
3445 nl_msg_type_to_str(h->nlmsg_type),
3446 ndm->ndm_ifindex, vid_present ? vid_buf : "",
3447 ndm->ndm_state, ndm->ndm_flags, &mac,
3448 dst_present ? dst_buf : "", nhg_id);
3449
3450 /* The interface should exist. */
3451 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3452 ndm->ndm_ifindex);
3453 if (!ifp || !ifp->info)
3454 return 0;
3455
3456 /* The interface should be something we're interested in. */
3457 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
3458 return 0;
3459
3460 zif = (struct zebra_if *)ifp->info;
3461 if ((br_if = zif->brslave_info.br_if) == NULL) {
3462 if (IS_ZEBRA_DEBUG_KERNEL)
3463 zlog_debug(
3464 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
3465 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
3466 ndm->ndm_ifindex,
3467 zif->brslave_info.bridge_ifindex);
3468 return 0;
3469 }
3470
3471 sticky = !!(ndm->ndm_flags & NTF_STICKY);
3472
3473 if (filter_vlan && vid != filter_vlan) {
3474 if (IS_ZEBRA_DEBUG_KERNEL)
3475 zlog_debug(" Filtered due to filter vlan: %d",
3476 filter_vlan);
3477 return 0;
3478 }
3479
3480 /* If add or update, do accordingly if learnt on a "local" interface; if
3481 * the notification is over VxLAN, this has to be related to
3482 * multi-homing,
3483 * so perform an implicit delete of any local entry (if it exists).
3484 */
3485 if (h->nlmsg_type == RTM_NEWNEIGH) {
3486 /* Drop "permanent" entries. */
3487 if (ndm->ndm_state & NUD_PERMANENT) {
3488 if (IS_ZEBRA_DEBUG_KERNEL)
3489 zlog_debug(
3490 " Dropping entry because of NUD_PERMANENT");
3491 return 0;
3492 }
3493
3494 if (IS_ZEBRA_IF_VXLAN(ifp))
3495 return zebra_vxlan_dp_network_mac_add(
3496 ifp, br_if, &mac, vid, nhg_id, sticky,
3497 !!(ndm->ndm_flags & NTF_EXT_LEARNED));
3498
3499 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
3500 sticky, local_inactive, dp_static);
3501 }
3502
3503 /* This is a delete notification.
3504 * Ignore the notification with IP dest as it may just signify that the
3505 * MAC has moved from remote to local. The exception is the special
3506 * all-zeros MAC that represents the BUM flooding entry; we may have
3507 * to readd it. Otherwise,
3508 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
3509 * 2. For a MAC over "local" interface, delete the mac
3510 * Note: We will get notifications from both bridge driver and VxLAN
3511 * driver.
3512 */
3513 if (nhg_id)
3514 return 0;
3515
3516 if (dst_present) {
3517 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
3518
3519 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
3520 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
3521 return 0;
3522 }
3523
3524 if (IS_ZEBRA_IF_VXLAN(ifp))
3525 return zebra_vxlan_dp_network_mac_del(ifp, br_if, &mac, vid);
3526
3527 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
3528 }
3529
3530 static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3531 {
3532 int len;
3533 struct ndmsg *ndm;
3534
3535 if (h->nlmsg_type != RTM_NEWNEIGH)
3536 return 0;
3537
3538 /* Length validity. */
3539 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3540 if (len < 0)
3541 return -1;
3542
3543 /* We are interested only in AF_BRIDGE notifications. */
3544 ndm = NLMSG_DATA(h);
3545 if (ndm->ndm_family != AF_BRIDGE)
3546 return 0;
3547
3548 return netlink_macfdb_change(h, len, ns_id);
3549 }
3550
3551 /* Request for MAC FDB information from the kernel */
3552 static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
3553 int type, ifindex_t master_ifindex)
3554 {
3555 struct {
3556 struct nlmsghdr n;
3557 struct ifinfomsg ifm;
3558 char buf[256];
3559 } req;
3560
3561 /* Form the request, specifying filter (rtattr) if needed. */
3562 memset(&req, 0, sizeof(req));
3563 req.n.nlmsg_type = type;
3564 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3565 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
3566 req.ifm.ifi_family = family;
3567 if (master_ifindex)
3568 nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
3569
3570 return netlink_request(netlink_cmd, &req);
3571 }
3572
3573 /*
3574 * MAC forwarding database read using netlink interface. This is invoked
3575 * at startup.
3576 */
3577 int netlink_macfdb_read(struct zebra_ns *zns)
3578 {
3579 int ret;
3580 struct zebra_dplane_info dp_info;
3581
3582 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3583
3584 /* Get bridge FDB table. */
3585 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3586 0);
3587 if (ret < 0)
3588 return ret;
3589 /* We are reading entire table. */
3590 filter_vlan = 0;
3591 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3592 &dp_info, 0, true);
3593
3594 return ret;
3595 }
3596
3597 /*
3598 * MAC forwarding database read using netlink interface. This is for a
3599 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3600 */
3601 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3602 struct interface *br_if)
3603 {
3604 struct zebra_if *br_zif;
3605 struct zebra_if *zif;
3606 struct zebra_l2info_vxlan *vxl;
3607 struct zebra_dplane_info dp_info;
3608 int ret = 0;
3609
3610 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3611
3612 /* Save VLAN we're filtering on, if needed. */
3613 br_zif = (struct zebra_if *)br_if->info;
3614 zif = (struct zebra_if *)ifp->info;
3615 vxl = &zif->l2info.vxl;
3616 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3617 filter_vlan = vxl->access_vlan;
3618
3619 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3620 */
3621 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3622 br_if->ifindex);
3623 if (ret < 0)
3624 return ret;
3625 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3626 &dp_info, 0, false);
3627
3628 /* Reset VLAN filter. */
3629 filter_vlan = 0;
3630 return ret;
3631 }
3632
3633
3634 /* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3635 static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
3636 int family, int type,
3637 struct interface *br_if,
3638 const struct ethaddr *mac,
3639 vlanid_t vid)
3640 {
3641 struct {
3642 struct nlmsghdr n;
3643 struct ndmsg ndm;
3644 char buf[256];
3645 } req;
3646 struct zebra_if *br_zif;
3647
3648 memset(&req, 0, sizeof(req));
3649 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3650 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3651 req.n.nlmsg_flags = NLM_F_REQUEST;
3652 req.ndm.ndm_family = family; /* AF_BRIDGE */
3653 /* req.ndm.ndm_state = NUD_REACHABLE; */
3654
3655 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
3656
3657 br_zif = (struct zebra_if *)br_if->info;
3658 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
3659 nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
3660
3661 nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
3662
3663 if (IS_ZEBRA_DEBUG_KERNEL)
3664 zlog_debug(
3665 "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %pEA vid %u",
3666 __func__, nl_family_to_str(req.ndm.ndm_family),
3667 br_if->name, br_if->ifindex, br_if->vrf->name,
3668 br_if->vrf->vrf_id, mac, vid);
3669
3670 return netlink_request(&zns->netlink_cmd, &req);
3671 }
3672
3673 int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3674 struct interface *br_if,
3675 const struct ethaddr *mac, vlanid_t vid)
3676 {
3677 int ret = 0;
3678 struct zebra_dplane_info dp_info;
3679
3680 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3681
3682 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3683 */
3684 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
3685 RTM_GETNEIGH,
3686 br_if, mac, vid);
3687 if (ret < 0)
3688 return ret;
3689
3690 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3691 &dp_info, 1, false);
3692
3693 return ret;
3694 }
3695
3696 /*
3697 * Netlink-specific handler for MAC updates using dataplane context object.
3698 */
3699 ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data,
3700 size_t datalen)
3701 {
3702 struct ipaddr vtep_ip;
3703 vlanid_t vid;
3704 ssize_t total;
3705 int cmd;
3706 uint8_t flags;
3707 uint16_t state;
3708 uint32_t nhg_id;
3709 uint32_t update_flags;
3710 bool nfy = false;
3711 uint8_t nfy_flags = 0;
3712 int proto = RTPROT_ZEBRA;
3713
3714 if (dplane_ctx_get_type(ctx) != 0)
3715 proto = zebra2proto(dplane_ctx_get_type(ctx));
3716
3717 cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3718 ? RTM_NEWNEIGH : RTM_DELNEIGH;
3719
3720 flags = NTF_MASTER;
3721 state = NUD_REACHABLE;
3722
3723 update_flags = dplane_ctx_mac_get_update_flags(ctx);
3724 if (update_flags & DPLANE_MAC_REMOTE) {
3725 flags |= NTF_SELF;
3726 if (dplane_ctx_mac_is_sticky(ctx)) {
3727 /* NUD_NOARP prevents the entry from expiring */
3728 state |= NUD_NOARP;
3729 /* sticky the entry from moving */
3730 flags |= NTF_STICKY;
3731 } else {
3732 flags |= NTF_EXT_LEARNED;
3733 }
3734 /* if it was static-local previously we need to clear the
3735 * notify flags on replace with remote
3736 */
3737 if (update_flags & DPLANE_MAC_WAS_STATIC)
3738 nfy = true;
3739 } else {
3740 /* local mac */
3741 if (update_flags & DPLANE_MAC_SET_STATIC) {
3742 nfy_flags |= FDB_NOTIFY_BIT;
3743 state |= NUD_NOARP;
3744 }
3745
3746 if (update_flags & DPLANE_MAC_SET_INACTIVE)
3747 nfy_flags |= FDB_NOTIFY_INACTIVE_BIT;
3748
3749 nfy = true;
3750 }
3751
3752 nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
3753 vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
3754 SET_IPADDR_V4(&vtep_ip);
3755
3756 if (IS_ZEBRA_DEBUG_KERNEL) {
3757 char vid_buf[20];
3758 const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
3759
3760 vid = dplane_ctx_mac_get_vlan(ctx);
3761 if (vid > 0)
3762 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3763 else
3764 vid_buf[0] = '\0';
3765
3766 zlog_debug(
3767 "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s",
3768 nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
3769 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3770 vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
3771 mac, &vtep_ip, nhg_id,
3772 (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "",
3773 (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync"
3774 : "",
3775 (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "",
3776 (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive"
3777 : "",
3778 nfy ? " nfy" : "");
3779 }
3780
3781 total = netlink_neigh_update_msg_encode(
3782 ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN,
3783 &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy,
3784 nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen,
3785 proto);
3786
3787 return total;
3788 }
3789
3790 /*
3791 * In the event the kernel deletes ipv4 link-local neighbor entries created for
3792 * 5549 support, re-install them.
3793 */
3794 static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
3795 struct interface *ifp, struct ipaddr *ip,
3796 bool handle_failed)
3797 {
3798 if (ndm->ndm_family != AF_INET)
3799 return;
3800
3801 if (!zif->v6_2_v4_ll_neigh_entry)
3802 return;
3803
3804 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
3805 return;
3806
3807 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
3808 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
3809 ifp->name);
3810 return;
3811 }
3812
3813 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
3814 }
3815
3816 #define NUD_VALID \
3817 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
3818 | NUD_DELAY)
3819 #define NUD_LOCAL_ACTIVE \
3820 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
3821
3822 static int netlink_nbr_entry_state_to_zclient(int nbr_state)
3823 {
3824 /* an exact match is done between
3825 * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h)
3826 * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX
3827 * (see in lib/zclient.h)
3828 */
3829 return nbr_state;
3830 }
3831 static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3832 {
3833 struct ndmsg *ndm;
3834 struct interface *ifp;
3835 struct zebra_if *zif;
3836 struct rtattr *tb[NDA_MAX + 1];
3837 struct interface *link_if;
3838 struct ethaddr mac;
3839 struct ipaddr ip;
3840 char buf[ETHER_ADDR_STRLEN];
3841 int mac_present = 0;
3842 bool is_ext;
3843 bool is_router;
3844 bool local_inactive;
3845 uint32_t ext_flags = 0;
3846 bool dp_static = false;
3847 int l2_len = 0;
3848 int cmd;
3849
3850 ndm = NLMSG_DATA(h);
3851
3852 /* The interface should exist. */
3853 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3854 ndm->ndm_ifindex);
3855 if (!ifp || !ifp->info)
3856 return 0;
3857
3858 zif = (struct zebra_if *)ifp->info;
3859
3860 /* Parse attributes and extract fields of interest. */
3861 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3862
3863 if (!tb[NDA_DST]) {
3864 zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
3865 nl_msg_type_to_str(h->nlmsg_type),
3866 nl_family_to_str(ndm->ndm_family), ifp->name,
3867 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id);
3868 return 0;
3869 }
3870
3871 memset(&ip, 0, sizeof(ip));
3872 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3873 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3874
3875 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3876 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
3877 netlink_handle_5549(ndm, zif, ifp, &ip, false);
3878 if (IS_ZEBRA_DEBUG_KERNEL)
3879 zlog_debug(
3880 " Neighbor Entry Received is a 5549 entry, finished");
3881 return 0;
3882 }
3883
3884 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
3885 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3886 netlink_handle_5549(ndm, zif, ifp, &ip, true);
3887
3888 /* we send link layer information to client:
3889 * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH
3890 * - struct ipaddr ( for DEL and GET)
3891 * - struct ethaddr mac; (for NEW)
3892 */
3893 if (h->nlmsg_type == RTM_NEWNEIGH)
3894 cmd = ZEBRA_NHRP_NEIGH_ADDED;
3895 else if (h->nlmsg_type == RTM_GETNEIGH)
3896 cmd = ZEBRA_NHRP_NEIGH_GET;
3897 else if (h->nlmsg_type == RTM_DELNEIGH)
3898 cmd = ZEBRA_NHRP_NEIGH_REMOVED;
3899 else {
3900 zlog_debug("%s(): unknown nlmsg type %u", __func__,
3901 h->nlmsg_type);
3902 return 0;
3903 }
3904 if (tb[NDA_LLADDR]) {
3905 /* copy LLADDR information */
3906 l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]);
3907 }
3908 if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) {
3909 union sockunion link_layer_ipv4;
3910
3911 if (l2_len) {
3912 sockunion_family(&link_layer_ipv4) = AF_INET;
3913 memcpy((void *)sockunion_get_addr(&link_layer_ipv4),
3914 RTA_DATA(tb[NDA_LLADDR]), l2_len);
3915 } else
3916 sockunion_family(&link_layer_ipv4) = AF_UNSPEC;
3917 zsend_nhrp_neighbor_notify(
3918 cmd, ifp, &ip,
3919 netlink_nbr_entry_state_to_zclient(ndm->ndm_state),
3920 &link_layer_ipv4);
3921 }
3922
3923 if (h->nlmsg_type == RTM_GETNEIGH)
3924 return 0;
3925
3926 /* The neighbor is present on an SVI. From this, we locate the
3927 * underlying
3928 * bridge because we're only interested in neighbors on a VxLAN bridge.
3929 * The bridge is located based on the nature of the SVI:
3930 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
3931 * interface
3932 * and is linked to the bridge
3933 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
3934 * interface
3935 * itself
3936 */
3937 if (IS_ZEBRA_IF_VLAN(ifp)) {
3938 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3939 zif->link_ifindex);
3940 if (!link_if)
3941 return 0;
3942 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
3943 link_if = ifp;
3944 else {
3945 link_if = NULL;
3946 if (IS_ZEBRA_DEBUG_KERNEL)
3947 zlog_debug(
3948 " Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
3949 }
3950
3951 memset(&mac, 0, sizeof(mac));
3952 if (h->nlmsg_type == RTM_NEWNEIGH) {
3953 if (tb[NDA_LLADDR]) {
3954 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
3955 if (IS_ZEBRA_DEBUG_KERNEL)
3956 zlog_debug(
3957 "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
3958 nl_msg_type_to_str(
3959 h->nlmsg_type),
3960 nl_family_to_str(
3961 ndm->ndm_family),
3962 ifp->name, ndm->ndm_ifindex,
3963 ifp->vrf->name,
3964 ifp->vrf->vrf_id,
3965 (unsigned long)RTA_PAYLOAD(
3966 tb[NDA_LLADDR]));
3967 return 0;
3968 }
3969
3970 mac_present = 1;
3971 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
3972 }
3973
3974 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
3975 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
3976
3977 if (tb[NDA_EXT_FLAGS]) {
3978 ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]);
3979 if (ext_flags & NTF_E_MH_PEER_SYNC)
3980 dp_static = true;
3981 }
3982
3983 if (IS_ZEBRA_DEBUG_KERNEL)
3984 zlog_debug(
3985 "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x",
3986 nl_msg_type_to_str(h->nlmsg_type),
3987 nl_family_to_str(ndm->ndm_family), ifp->name,
3988 ndm->ndm_ifindex, ifp->vrf->name,
3989 ifp->vrf->vrf_id, &ip,
3990 mac_present
3991 ? prefix_mac2str(&mac, buf, sizeof(buf))
3992 : "",
3993 ndm->ndm_state, ndm->ndm_flags, ext_flags);
3994
3995 /* If the neighbor state is valid for use, process as an add or
3996 * update
3997 * else process as a delete. Note that the delete handling may
3998 * result
3999 * in re-adding the neighbor if it is a valid "remote" neighbor.
4000 */
4001 if (ndm->ndm_state & NUD_VALID) {
4002 if (zebra_evpn_mh_do_adv_reachable_neigh_only())
4003 local_inactive =
4004 !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
4005 else
4006 /* If EVPN-MH is not enabled we treat STALE
4007 * neighbors as locally-active and advertise
4008 * them
4009 */
4010 local_inactive = false;
4011
4012 /* Add local neighbors to the l3 interface database */
4013 if (is_ext)
4014 zebra_neigh_del(ifp, &ip);
4015 else
4016 zebra_neigh_add(ifp, &ip, &mac);
4017
4018 if (link_if)
4019 zebra_vxlan_handle_kernel_neigh_update(
4020 ifp, link_if, &ip, &mac, ndm->ndm_state,
4021 is_ext, is_router, local_inactive,
4022 dp_static);
4023 return 0;
4024 }
4025
4026
4027 zebra_neigh_del(ifp, &ip);
4028 if (link_if)
4029 zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4030 return 0;
4031 }
4032
4033 if (IS_ZEBRA_DEBUG_KERNEL)
4034 zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA",
4035 nl_msg_type_to_str(h->nlmsg_type),
4036 nl_family_to_str(ndm->ndm_family), ifp->name,
4037 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id,
4038 &ip);
4039
4040 /* Process the delete - it may result in re-adding the neighbor if it is
4041 * a valid "remote" neighbor.
4042 */
4043 zebra_neigh_del(ifp, &ip);
4044 if (link_if)
4045 zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4046
4047 return 0;
4048 }
4049
4050 static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
4051 {
4052 int len;
4053 struct ndmsg *ndm;
4054
4055 if (h->nlmsg_type != RTM_NEWNEIGH)
4056 return 0;
4057
4058 /* Length validity. */
4059 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4060 if (len < 0)
4061 return -1;
4062
4063 /* We are interested only in AF_INET or AF_INET6 notifications. */
4064 ndm = NLMSG_DATA(h);
4065 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
4066 return 0;
4067
4068 return netlink_neigh_change(h, len);
4069 }
4070
4071 /* Request for IP neighbor information from the kernel */
4072 static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
4073 int type, ifindex_t ifindex)
4074 {
4075 struct {
4076 struct nlmsghdr n;
4077 struct ndmsg ndm;
4078 char buf[256];
4079 } req;
4080
4081 /* Form the request, specifying filter (rtattr) if needed. */
4082 memset(&req, 0, sizeof(req));
4083 req.n.nlmsg_type = type;
4084 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
4085 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4086 req.ndm.ndm_family = family;
4087 if (ifindex)
4088 nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
4089
4090 return netlink_request(netlink_cmd, &req);
4091 }
4092
4093 /*
4094 * IP Neighbor table read using netlink interface. This is invoked
4095 * at startup.
4096 */
4097 int netlink_neigh_read(struct zebra_ns *zns)
4098 {
4099 int ret;
4100 struct zebra_dplane_info dp_info;
4101
4102 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4103
4104 /* Get IP neighbor table. */
4105 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4106 0);
4107 if (ret < 0)
4108 return ret;
4109 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4110 &dp_info, 0, true);
4111
4112 return ret;
4113 }
4114
4115 /*
4116 * IP Neighbor table read using netlink interface. This is for a specific
4117 * VLAN device.
4118 */
4119 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
4120 {
4121 int ret = 0;
4122 struct zebra_dplane_info dp_info;
4123
4124 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4125
4126 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4127 vlan_if->ifindex);
4128 if (ret < 0)
4129 return ret;
4130 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4131 &dp_info, 0, false);
4132
4133 return ret;
4134 }
4135
4136 /*
4137 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
4138 * read using netlink interface.
4139 */
4140 static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
4141 int type,
4142 const struct ipaddr *ip,
4143 ifindex_t ifindex)
4144 {
4145 struct {
4146 struct nlmsghdr n;
4147 struct ndmsg ndm;
4148 char buf[256];
4149 } req;
4150 int ipa_len;
4151
4152 /* Form the request, specifying filter (rtattr) if needed. */
4153 memset(&req, 0, sizeof(req));
4154 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4155 req.n.nlmsg_flags = NLM_F_REQUEST;
4156 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
4157 req.ndm.ndm_ifindex = ifindex;
4158
4159 if (IS_IPADDR_V4(ip)) {
4160 ipa_len = IPV4_MAX_BYTELEN;
4161 req.ndm.ndm_family = AF_INET;
4162
4163 } else {
4164 ipa_len = IPV6_MAX_BYTELEN;
4165 req.ndm.ndm_family = AF_INET6;
4166 }
4167
4168 nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
4169
4170 if (IS_ZEBRA_DEBUG_KERNEL)
4171 zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x",
4172 __func__, nl_msg_type_to_str(type),
4173 nl_family_to_str(req.ndm.ndm_family), ifindex, ip,
4174 req.n.nlmsg_flags);
4175
4176 return netlink_request(&zns->netlink_cmd, &req);
4177 }
4178
4179 int netlink_neigh_read_specific_ip(const struct ipaddr *ip,
4180 struct interface *vlan_if)
4181 {
4182 int ret = 0;
4183 struct zebra_ns *zns;
4184 struct zebra_vrf *zvrf = vlan_if->vrf->info;
4185 struct zebra_dplane_info dp_info;
4186
4187 zns = zvrf->zns;
4188
4189 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4190
4191 if (IS_ZEBRA_DEBUG_KERNEL)
4192 zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)",
4193 __func__, vlan_if->name, vlan_if->ifindex, ip,
4194 vlan_if->vrf->name, vlan_if->vrf->vrf_id);
4195
4196 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
4197 vlan_if->ifindex);
4198 if (ret < 0)
4199 return ret;
4200
4201 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4202 &dp_info, 1, false);
4203
4204 return ret;
4205 }
4206
4207 int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
4208 {
4209 int len;
4210 struct ndmsg *ndm;
4211
4212 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH
4213 || h->nlmsg_type == RTM_GETNEIGH))
4214 return 0;
4215
4216 /* Length validity. */
4217 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4218 if (len < 0) {
4219 zlog_err(
4220 "%s: Message received from netlink is of a broken size %d %zu",
4221 __func__, h->nlmsg_len,
4222 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
4223 return -1;
4224 }
4225
4226 /* Is this a notification for the MAC FDB or IP neighbor table? */
4227 ndm = NLMSG_DATA(h);
4228 if (ndm->ndm_family == AF_BRIDGE)
4229 return netlink_macfdb_change(h, len, ns_id);
4230
4231 if (ndm->ndm_type != RTN_UNICAST)
4232 return 0;
4233
4234 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
4235 return netlink_ipneigh_change(h, len, ns_id);
4236 else {
4237 flog_warn(
4238 EC_ZEBRA_UNKNOWN_FAMILY,
4239 "Invalid address family: %u received from kernel neighbor change: %s",
4240 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
4241 return 0;
4242 }
4243
4244 return 0;
4245 }
4246
4247 /*
4248 * Utility neighbor-update function, using info from dplane context.
4249 */
4250 static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
4251 int cmd, void *buf, size_t buflen)
4252 {
4253 const struct ipaddr *ip;
4254 const struct ethaddr *mac = NULL;
4255 const struct ipaddr *link_ip = NULL;
4256 const void *link_ptr = NULL;
4257 char buf2[ETHER_ADDR_STRLEN];
4258
4259 int llalen;
4260 uint8_t flags;
4261 uint16_t state;
4262 uint8_t family;
4263 uint32_t update_flags;
4264 uint32_t ext_flags = 0;
4265 bool ext = false;
4266 int proto = RTPROT_ZEBRA;
4267
4268 if (dplane_ctx_get_type(ctx) != 0)
4269 proto = zebra2proto(dplane_ctx_get_type(ctx));
4270
4271 ip = dplane_ctx_neigh_get_ipaddr(ctx);
4272
4273 if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL
4274 || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) {
4275 link_ip = dplane_ctx_neigh_get_link_ip(ctx);
4276 llalen = IPADDRSZ(link_ip);
4277 link_ptr = (const void *)&(link_ip->ip.addr);
4278 ipaddr2str(link_ip, buf2, sizeof(buf2));
4279 } else {
4280 mac = dplane_ctx_neigh_get_mac(ctx);
4281 llalen = ETH_ALEN;
4282 link_ptr = (const void *)mac;
4283 if (is_zero_mac(mac))
4284 mac = NULL;
4285 if (mac)
4286 prefix_mac2str(mac, buf2, sizeof(buf2));
4287 else
4288 snprintf(buf2, sizeof(buf2), "null");
4289 }
4290 update_flags = dplane_ctx_neigh_get_update_flags(ctx);
4291 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
4292 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
4293
4294 family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
4295
4296 if (update_flags & DPLANE_NEIGH_REMOTE) {
4297 flags |= NTF_EXT_LEARNED;
4298 /* if it was static-local previously we need to clear the
4299 * ext flags on replace with remote
4300 */
4301 if (update_flags & DPLANE_NEIGH_WAS_STATIC)
4302 ext = true;
4303 } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) {
4304 ext = true;
4305 /* local neigh */
4306 if (update_flags & DPLANE_NEIGH_SET_STATIC)
4307 ext_flags |= NTF_E_MH_PEER_SYNC;
4308 }
4309 if (IS_ZEBRA_DEBUG_KERNEL)
4310 zlog_debug(
4311 "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x",
4312 nl_msg_type_to_str(cmd), nl_family_to_str(family),
4313 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
4314 ip, link_ip ? "Link " : "MAC ", buf2, flags, state,
4315 ext ? "ext " : "", ext_flags);
4316
4317 return netlink_neigh_update_msg_encode(
4318 ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST,
4319 flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext,
4320 ext_flags, buf, buflen, proto);
4321 }
4322
4323 static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx,
4324 void *data, size_t datalen)
4325 {
4326 struct {
4327 struct nlmsghdr n;
4328 struct ndtmsg ndtm;
4329 char buf[];
4330 } *req = data;
4331 struct rtattr *nest;
4332 uint8_t family;
4333 ifindex_t idx;
4334 uint32_t val;
4335
4336 if (datalen < sizeof(*req))
4337 return 0;
4338 memset(req, 0, sizeof(*req));
4339 family = dplane_ctx_neightable_get_family(ctx);
4340 idx = dplane_ctx_get_ifindex(ctx);
4341
4342 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
4343 req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE;
4344 req->n.nlmsg_type = RTM_SETNEIGHTBL;
4345 req->ndtm.ndtm_family = family;
4346
4347 nl_attr_put(&req->n, datalen, NDTA_NAME,
4348 family == AF_INET ? "arp_cache" : "ndisc_cache", 10);
4349 nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS);
4350 if (nest == NULL)
4351 return 0;
4352 if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx)))
4353 return 0;
4354 val = dplane_ctx_neightable_get_app_probes(ctx);
4355 if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val)))
4356 return 0;
4357 val = dplane_ctx_neightable_get_mcast_probes(ctx);
4358 if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val,
4359 sizeof(val)))
4360 return 0;
4361 val = dplane_ctx_neightable_get_ucast_probes(ctx);
4362 if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val,
4363 sizeof(val)))
4364 return 0;
4365 nl_attr_nest_end(&req->n, nest);
4366
4367 return NLMSG_ALIGN(req->n.nlmsg_len);
4368 }
4369
4370 static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx,
4371 void *buf, size_t buflen)
4372 {
4373 ssize_t ret;
4374
4375 switch (dplane_ctx_get_op(ctx)) {
4376 case DPLANE_OP_NEIGH_INSTALL:
4377 case DPLANE_OP_NEIGH_UPDATE:
4378 case DPLANE_OP_NEIGH_DISCOVER:
4379 case DPLANE_OP_NEIGH_IP_INSTALL:
4380 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen);
4381 break;
4382 case DPLANE_OP_NEIGH_DELETE:
4383 case DPLANE_OP_NEIGH_IP_DELETE:
4384 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen);
4385 break;
4386 case DPLANE_OP_VTEP_ADD:
4387 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf,
4388 buflen);
4389 break;
4390 case DPLANE_OP_VTEP_DELETE:
4391 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf,
4392 buflen);
4393 break;
4394 case DPLANE_OP_NEIGH_TABLE_UPDATE:
4395 ret = netlink_neigh_table_update_ctx(ctx, buf, buflen);
4396 break;
4397 default:
4398 ret = -1;
4399 }
4400
4401 return ret;
4402 }
4403
4404 /*
4405 * Update MAC, using dataplane context object.
4406 */
4407
4408 enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth,
4409 struct zebra_dplane_ctx *ctx)
4410 {
4411 return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx,
4412 false);
4413 }
4414
4415 enum netlink_msg_status
4416 netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
4417 {
4418 return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder,
4419 false);
4420 }
4421
4422 /*
4423 * MPLS label forwarding table change via netlink interface, using dataplane
4424 * context information.
4425 */
4426 ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
4427 void *buf, size_t buflen)
4428 {
4429 mpls_lse_t lse;
4430 const struct nhlfe_list_head *head;
4431 const struct zebra_nhlfe *nhlfe;
4432 struct nexthop *nexthop = NULL;
4433 unsigned int nexthop_num;
4434 const char *routedesc;
4435 int route_type;
4436 struct prefix p = {0};
4437 struct nlsock *nl =
4438 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
4439
4440 struct {
4441 struct nlmsghdr n;
4442 struct rtmsg r;
4443 char buf[0];
4444 } *req = buf;
4445
4446 if (buflen < sizeof(*req))
4447 return 0;
4448
4449 memset(req, 0, sizeof(*req));
4450
4451 /*
4452 * Count # nexthops so we can decide whether to use singlepath
4453 * or multipath case.
4454 */
4455 nexthop_num = 0;
4456 head = dplane_ctx_get_nhlfe_list(ctx);
4457 frr_each(nhlfe_list_const, head, nhlfe) {
4458 nexthop = nhlfe->nexthop;
4459 if (!nexthop)
4460 continue;
4461 if (cmd == RTM_NEWROUTE) {
4462 /* Count all selected NHLFEs */
4463 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4464 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
4465 nexthop_num++;
4466 } else { /* DEL */
4467 /* Count all installed NHLFEs */
4468 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
4469 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
4470 nexthop_num++;
4471 }
4472 }
4473
4474 if ((nexthop_num == 0) ||
4475 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
4476 return 0;
4477
4478 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
4479 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
4480 req->n.nlmsg_type = cmd;
4481 req->n.nlmsg_pid = nl->snl.nl_pid;
4482
4483 req->r.rtm_family = AF_MPLS;
4484 req->r.rtm_table = RT_TABLE_MAIN;
4485 req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
4486 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
4487 req->r.rtm_type = RTN_UNICAST;
4488
4489 if (cmd == RTM_NEWROUTE) {
4490 /* We do a replace to handle update. */
4491 req->n.nlmsg_flags |= NLM_F_REPLACE;
4492
4493 /* set the protocol value if installing */
4494 route_type = re_type_from_lsp_type(
4495 dplane_ctx_get_best_nhlfe(ctx)->type);
4496 req->r.rtm_protocol = zebra2proto(route_type);
4497 }
4498
4499 /* Fill destination */
4500 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
4501 if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
4502 return 0;
4503
4504 /* Fill nexthops (paths) based on single-path or multipath. The paths
4505 * chosen depend on the operation.
4506 */
4507 if (nexthop_num == 1) {
4508 routedesc = "single-path";
4509 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4510 routedesc);
4511
4512 nexthop_num = 0;
4513 frr_each(nhlfe_list_const, head, nhlfe) {
4514 nexthop = nhlfe->nexthop;
4515 if (!nexthop)
4516 continue;
4517
4518 if ((cmd == RTM_NEWROUTE
4519 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4520 && CHECK_FLAG(nexthop->flags,
4521 NEXTHOP_FLAG_ACTIVE)))
4522 || (cmd == RTM_DELROUTE
4523 && (CHECK_FLAG(nhlfe->flags,
4524 NHLFE_FLAG_INSTALLED)
4525 && CHECK_FLAG(nexthop->flags,
4526 NEXTHOP_FLAG_FIB)))) {
4527 /* Add the gateway */
4528 if (!_netlink_mpls_build_singlepath(
4529 &p, routedesc, nhlfe, &req->n,
4530 &req->r, buflen, cmd))
4531 return false;
4532
4533 nexthop_num++;
4534 break;
4535 }
4536 }
4537 } else { /* Multipath case */
4538 struct rtattr *nest;
4539 const union g_addr *src1 = NULL;
4540
4541 nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
4542 if (!nest)
4543 return 0;
4544
4545 routedesc = "multipath";
4546 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4547 routedesc);
4548
4549 nexthop_num = 0;
4550 frr_each(nhlfe_list_const, head, nhlfe) {
4551 nexthop = nhlfe->nexthop;
4552 if (!nexthop)
4553 continue;
4554
4555 if ((cmd == RTM_NEWROUTE
4556 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4557 && CHECK_FLAG(nexthop->flags,
4558 NEXTHOP_FLAG_ACTIVE)))
4559 || (cmd == RTM_DELROUTE
4560 && (CHECK_FLAG(nhlfe->flags,
4561 NHLFE_FLAG_INSTALLED)
4562 && CHECK_FLAG(nexthop->flags,
4563 NEXTHOP_FLAG_FIB)))) {
4564 nexthop_num++;
4565
4566 /* Build the multipath */
4567 if (!_netlink_mpls_build_multipath(
4568 &p, routedesc, nhlfe, &req->n,
4569 buflen, &req->r, &src1))
4570 return 0;
4571 }
4572 }
4573
4574 /* Add the multipath */
4575 nl_attr_nest_end(&req->n, nest);
4576 }
4577
4578 return NLMSG_ALIGN(req->n.nlmsg_len);
4579 }
4580
4581 /****************************************************************************
4582 * This code was developed in a branch that didn't have dplane APIs for
4583 * MAC updates. Hence the use of the legacy style. It will be moved to
4584 * the new dplane style pre-merge to master. XXX
4585 */
4586 static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
4587 {
4588 struct {
4589 struct nlmsghdr n;
4590 struct nhmsg nhm;
4591 char buf[256];
4592 } req;
4593 int cmd = RTM_NEWNEXTHOP;
4594 struct zebra_vrf *zvrf;
4595 struct zebra_ns *zns;
4596
4597 zvrf = zebra_vrf_get_evpn();
4598 zns = zvrf->zns;
4599
4600 memset(&req, 0, sizeof(req));
4601
4602 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4603 req.n.nlmsg_flags = NLM_F_REQUEST;
4604 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4605 req.n.nlmsg_type = cmd;
4606 req.nhm.nh_family = AF_INET;
4607
4608 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4609 return -1;
4610 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4611 return -1;
4612 if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
4613 &vtep_ip, IPV4_MAX_BYTELEN))
4614 return -1;
4615
4616 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4617 zlog_debug("Tx %s fdb-nh 0x%x %pI4",
4618 nl_msg_type_to_str(cmd), nh_id, &vtep_ip);
4619 }
4620
4621 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4622 false);
4623 }
4624
4625 static int netlink_fdb_nh_del(uint32_t nh_id)
4626 {
4627 struct {
4628 struct nlmsghdr n;
4629 struct nhmsg nhm;
4630 char buf[256];
4631 } req;
4632 int cmd = RTM_DELNEXTHOP;
4633 struct zebra_vrf *zvrf;
4634 struct zebra_ns *zns;
4635
4636 zvrf = zebra_vrf_get_evpn();
4637 zns = zvrf->zns;
4638
4639 memset(&req, 0, sizeof(req));
4640
4641 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4642 req.n.nlmsg_flags = NLM_F_REQUEST;
4643 req.n.nlmsg_type = cmd;
4644 req.nhm.nh_family = AF_UNSPEC;
4645
4646 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4647 return -1;
4648
4649 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4650 zlog_debug("Tx %s fdb-nh 0x%x",
4651 nl_msg_type_to_str(cmd), nh_id);
4652 }
4653
4654 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4655 false);
4656 }
4657
4658 static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
4659 struct nh_grp *nh_ids)
4660 {
4661 struct {
4662 struct nlmsghdr n;
4663 struct nhmsg nhm;
4664 char buf[256];
4665 } req;
4666 int cmd = RTM_NEWNEXTHOP;
4667 struct zebra_vrf *zvrf;
4668 struct zebra_ns *zns;
4669 struct nexthop_grp grp[nh_cnt];
4670 uint32_t i;
4671
4672 zvrf = zebra_vrf_get_evpn();
4673 zns = zvrf->zns;
4674
4675 memset(&req, 0, sizeof(req));
4676
4677 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4678 req.n.nlmsg_flags = NLM_F_REQUEST;
4679 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4680 req.n.nlmsg_type = cmd;
4681 req.nhm.nh_family = AF_UNSPEC;
4682
4683 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
4684 return -1;
4685 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4686 return -1;
4687 memset(&grp, 0, sizeof(grp));
4688 for (i = 0; i < nh_cnt; ++i) {
4689 grp[i].id = nh_ids[i].id;
4690 grp[i].weight = nh_ids[i].weight;
4691 }
4692 if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
4693 grp, nh_cnt * sizeof(struct nexthop_grp)))
4694 return -1;
4695
4696
4697 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4698 char vtep_str[ES_VTEP_LIST_STR_SZ];
4699 char nh_buf[16];
4700
4701 vtep_str[0] = '\0';
4702 for (i = 0; i < nh_cnt; ++i) {
4703 snprintf(nh_buf, sizeof(nh_buf), "%u ",
4704 grp[i].id);
4705 strlcat(vtep_str, nh_buf, sizeof(vtep_str));
4706 }
4707
4708 zlog_debug("Tx %s fdb-nhg 0x%x %s",
4709 nl_msg_type_to_str(cmd), nhg_id, vtep_str);
4710 }
4711
4712 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4713 false);
4714 }
4715
4716 static int netlink_fdb_nhg_del(uint32_t nhg_id)
4717 {
4718 return netlink_fdb_nh_del(nhg_id);
4719 }
4720
4721 int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
4722 {
4723 return netlink_fdb_nh_update(nh_id, vtep_ip);
4724 }
4725
4726 int kernel_del_mac_nh(uint32_t nh_id)
4727 {
4728 return netlink_fdb_nh_del(nh_id);
4729 }
4730
4731 int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
4732 struct nh_grp *nh_ids)
4733 {
4734 return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
4735 }
4736
4737 int kernel_del_mac_nhg(uint32_t nhg_id)
4738 {
4739 return netlink_fdb_nhg_del(nhg_id);
4740 }
4741
4742 #endif /* HAVE_NETLINK */