]> git.proxmox.com Git - mirror_frr.git/blame - zebra/rt_netlink.c
zebra: Add netlink_interface_addr tracepoint
[mirror_frr.git] / zebra / rt_netlink.c
CommitLineData
718e3744 1/* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21#include <zebra.h>
ddfeb486
DL
22
23#ifdef HAVE_NETLINK
24
8689b25a
HS
25/* The following definition is to workaround an issue in the Linux kernel
26 * header files with redefinition of 'struct in6_addr' in both
27 * netinet/in.h and linux/in6.h.
28 * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html
29 */
30#define _LINUX_IN6_H
31
8ccc7e80 32#include <net/if_arp.h>
ba777396
RW
33#include <linux/lwtunnel.h>
34#include <linux/mpls_iptunnel.h>
8689b25a
HS
35#include <linux/seg6_iptunnel.h>
36#include <linux/seg6_local.h>
ba777396
RW
37#include <linux/neighbour.h>
38#include <linux/rtnetlink.h>
d9f5b2f5 39#include <linux/nexthop.h>
718e3744 40
41/* Hack for GNU libc version 2. */
42#ifndef MSG_TRUNC
43#define MSG_TRUNC 0x20
44#endif /* MSG_TRUNC */
45
46#include "linklist.h"
47#include "if.h"
48#include "log.h"
49#include "prefix.h"
8689b25a
HS
50#include "plist.h"
51#include "plist_int.h"
718e3744 52#include "connected.h"
53#include "table.h"
26e2ae36 54#include "memory.h"
718e3744 55#include "rib.h"
e04ab74d 56#include "thread.h"
edd7c245 57#include "privs.h"
fb018d25 58#include "nexthop.h"
78104b9b 59#include "vrf.h"
5e6a74d8 60#include "vty.h"
40c7bdb0 61#include "mpls.h"
13d60d35 62#include "vxlan.h"
8d03bc50 63#include "printfrr.h"
718e3744 64
bf094f69 65#include "zebra/zapi_msg.h"
fe18ee2d 66#include "zebra/zebra_ns.h"
7c551956 67#include "zebra/zebra_vrf.h"
6621ca86 68#include "zebra/rt.h"
718e3744 69#include "zebra/redistribute.h"
70#include "zebra/interface.h"
71#include "zebra/debug.h"
12f6fb97 72#include "zebra/rtadv.h"
567b877d 73#include "zebra/zebra_ptm.h"
40c7bdb0 74#include "zebra/zebra_mpls.h"
1fdc9eae 75#include "zebra/kernel_netlink.h"
76#include "zebra/rt_netlink.h"
d9f5b2f5 77#include "zebra/zebra_nhg.h"
e3be0432 78#include "zebra/zebra_mroute.h"
2232a77c 79#include "zebra/zebra_vxlan.h"
364fed6b 80#include "zebra/zebra_errors.h"
506efd37 81#include "zebra/zebra_evpn_mh.h"
1d80c209 82#include "zebra/zebra_trace.h"
e3be0432 83
40c7bdb0 84#ifndef AF_MPLS
85#define AF_MPLS 28
86#endif
87
d87ed8d7
AK
88/* Re-defining as I am unable to include <linux/if_bridge.h> which has the
89 * UAPI for MAC sync. */
90#ifndef _UAPI_LINUX_IF_BRIDGE_H
4bcdb608 91#define BR_SPH_LIST_SIZE 10
d87ed8d7
AK
92#endif
93
2232a77c 94static vlanid_t filter_vlan = 0;
95
7c99d51b
MS
96/* We capture whether the current kernel supports nexthop ids; by
97 * default, we'll use them if possible. There's also a configuration
98 * available to _disable_ use of kernel nexthops.
99 */
fec211ad 100static bool supports_nh;
81505946 101
d62a17ae 102struct gw_family_t {
d7c0a89a
QY
103 uint16_t filler;
104 uint16_t family;
d62a17ae 105 union g_addr gate;
40c7bdb0 106};
107
2b64873d
DL
108static const char ipv4_ll_buf[16] = "169.254.0.1";
109static struct in_addr ipv4_ll;
8755598a 110
002e5c43
SW
111/* Is this a ipv4 over ipv6 route? */
112static bool is_route_v4_over_v6(unsigned char rtm_family,
113 enum nexthop_types_t nexthop_type)
114{
115 if (rtm_family == AF_INET
116 && (nexthop_type == NEXTHOP_TYPE_IPV6
117 || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
118 return true;
119
120 return false;
121}
122
7c99d51b
MS
123/* Helper to control use of kernel-level nexthop ids */
124static bool kernel_nexthops_supported(void)
125{
d982012a
SW
126 return (supports_nh && !vrf_is_backend_netns()
127 && zebra_nhg_kernel_nexthops_enabled());
7c99d51b
MS
128}
129
6c67f41f
SW
130/*
131 * Some people may only want to use NHGs created by protos and not
132 * implicitly created by Zebra. This check accounts for that.
133 */
134static bool proto_nexthops_only(void)
135{
136 return zebra_nhg_proto_nexthops_only();
137}
138
139/* Is this a proto created NHG? */
140static bool is_proto_nhg(uint32_t id, int type)
141{
142 /* If type is available, use it as the source of truth */
143 if (type) {
144 if (type != ZEBRA_ROUTE_NHG)
145 return true;
146 return false;
147 }
148
54c89c93 149 if (id >= ZEBRA_NHG_PROTO_LOWER)
6c67f41f
SW
150 return true;
151
152 return false;
153}
154
8755598a
DS
155/*
156 * The ipv4_ll data structure is used for all 5549
157 * additions to the kernel. Let's figure out the
158 * correct value one time instead for every
159 * install/remove of a 5549 type route
160 */
d62a17ae 161void rt_netlink_init(void)
8755598a 162{
d62a17ae 163 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
8755598a
DS
164}
165
931fa60c
MS
166/*
167 * Mapping from dataplane neighbor flags to netlink flags
168 */
169static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
170{
171 uint8_t flags = 0;
172
173 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
174 flags |= NTF_EXT_LEARNED;
175 if (dplane_flags & DPLANE_NTF_ROUTER)
176 flags |= NTF_ROUTER;
d68e74b4
JU
177 if (dplane_flags & DPLANE_NTF_USE)
178 flags |= NTF_USE;
931fa60c
MS
179
180 return flags;
181}
182
183/*
184 * Mapping from dataplane neighbor state to netlink state
185 */
186static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
187{
188 uint16_t state = 0;
189
190 if (dplane_state & DPLANE_NUD_REACHABLE)
191 state |= NUD_REACHABLE;
192 if (dplane_state & DPLANE_NUD_STALE)
193 state |= NUD_STALE;
194 if (dplane_state & DPLANE_NUD_NOARP)
195 state |= NUD_NOARP;
196 if (dplane_state & DPLANE_NUD_PROBE)
197 state |= NUD_PROBE;
d68e74b4
JU
198 if (dplane_state & DPLANE_NUD_INCOMPLETE)
199 state |= NUD_INCOMPLETE;
0a27a2fe
PG
200 if (dplane_state & DPLANE_NUD_PERMANENT)
201 state |= NUD_PERMANENT;
202 if (dplane_state & DPLANE_NUD_FAILED)
203 state |= NUD_FAILED;
931fa60c
MS
204
205 return state;
206}
207
208
6a6d11a3 209static inline bool is_selfroute(int proto)
23b1f334 210{
d62a17ae 211 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
d4d71f11 212 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
d62a17ae 213 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
214 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
915902cb 215 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
0761368a 216 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
31f937fb
SM
217 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)
218 || (proto == RTPROT_SRTE)) {
6a6d11a3 219 return true;
d62a17ae 220 }
221
6a6d11a3 222 return false;
23b1f334
DD
223}
224
915902cb 225static inline int zebra2proto(int proto)
23b1f334 226{
d62a17ae 227 switch (proto) {
228 case ZEBRA_ROUTE_BABEL:
229 proto = RTPROT_BABEL;
230 break;
231 case ZEBRA_ROUTE_BGP:
232 proto = RTPROT_BGP;
233 break;
234 case ZEBRA_ROUTE_OSPF:
235 case ZEBRA_ROUTE_OSPF6:
236 proto = RTPROT_OSPF;
237 break;
238 case ZEBRA_ROUTE_STATIC:
d4d71f11 239 proto = RTPROT_ZSTATIC;
d62a17ae 240 break;
241 case ZEBRA_ROUTE_ISIS:
242 proto = RTPROT_ISIS;
243 break;
244 case ZEBRA_ROUTE_RIP:
245 proto = RTPROT_RIP;
246 break;
247 case ZEBRA_ROUTE_RIPNG:
248 proto = RTPROT_RIPNG;
249 break;
250 case ZEBRA_ROUTE_NHRP:
251 proto = RTPROT_NHRP;
252 break;
253 case ZEBRA_ROUTE_EIGRP:
254 proto = RTPROT_EIGRP;
255 break;
256 case ZEBRA_ROUTE_LDP:
257 proto = RTPROT_LDP;
258 break;
8a71d93d
DS
259 case ZEBRA_ROUTE_SHARP:
260 proto = RTPROT_SHARP;
261 break;
0761368a
DS
262 case ZEBRA_ROUTE_PBR:
263 proto = RTPROT_PBR;
264 break;
da82f6b4
CF
265 case ZEBRA_ROUTE_OPENFABRIC:
266 proto = RTPROT_OPENFABRIC;
267 break;
31f937fb
SM
268 case ZEBRA_ROUTE_SRTE:
269 proto = RTPROT_SRTE;
270 break;
a56ec5c0 271 case ZEBRA_ROUTE_TABLE:
38e40db1 272 case ZEBRA_ROUTE_NHG:
a56ec5c0
DS
273 proto = RTPROT_ZEBRA;
274 break;
911d4d48
DE
275 case ZEBRA_ROUTE_CONNECT:
276 case ZEBRA_ROUTE_KERNEL:
277 proto = RTPROT_KERNEL;
278 break;
d62a17ae 279 default:
0761368a
DS
280 /*
281 * When a user adds a new protocol this will show up
282 * to let them know to do something about it. This
283 * is intentionally a warn because we should see
284 * this as part of development of a new protocol
285 */
9df414fe
QY
286 zlog_debug(
287 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 288 __func__, proto);
d62a17ae 289 proto = RTPROT_ZEBRA;
290 break;
291 }
292
293 return proto;
23b1f334
DD
294}
295
38e40db1 296static inline int proto2zebra(int proto, int family, bool is_nexthop)
915902cb
DS
297{
298 switch (proto) {
299 case RTPROT_BABEL:
300 proto = ZEBRA_ROUTE_BABEL;
301 break;
302 case RTPROT_BGP:
303 proto = ZEBRA_ROUTE_BGP;
304 break;
305 case RTPROT_OSPF:
d6816f68
DS
306 proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF
307 : ZEBRA_ROUTE_OSPF6;
915902cb
DS
308 break;
309 case RTPROT_ISIS:
310 proto = ZEBRA_ROUTE_ISIS;
311 break;
312 case RTPROT_RIP:
313 proto = ZEBRA_ROUTE_RIP;
314 break;
315 case RTPROT_RIPNG:
316 proto = ZEBRA_ROUTE_RIPNG;
317 break;
318 case RTPROT_NHRP:
319 proto = ZEBRA_ROUTE_NHRP;
320 break;
321 case RTPROT_EIGRP:
322 proto = ZEBRA_ROUTE_EIGRP;
323 break;
324 case RTPROT_LDP:
325 proto = ZEBRA_ROUTE_LDP;
326 break;
327 case RTPROT_STATIC:
d4d71f11 328 case RTPROT_ZSTATIC:
915902cb
DS
329 proto = ZEBRA_ROUTE_STATIC;
330 break;
0761368a
DS
331 case RTPROT_SHARP:
332 proto = ZEBRA_ROUTE_SHARP;
333 break;
334 case RTPROT_PBR:
335 proto = ZEBRA_ROUTE_PBR;
336 break;
da82f6b4
CF
337 case RTPROT_OPENFABRIC:
338 proto = ZEBRA_ROUTE_OPENFABRIC;
339 break;
31f937fb
SM
340 case RTPROT_SRTE:
341 proto = ZEBRA_ROUTE_SRTE;
342 break;
38e40db1
SW
343 case RTPROT_ZEBRA:
344 if (is_nexthop) {
345 proto = ZEBRA_ROUTE_NHG;
346 break;
347 }
348 /* Intentional fall thru */
915902cb 349 default:
0761368a
DS
350 /*
351 * When a user adds a new protocol this will show up
352 * to let them know to do something about it. This
353 * is intentionally a warn because we should see
354 * this as part of development of a new protocol
355 */
9df414fe
QY
356 zlog_debug(
357 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 358 __func__, proto);
915902cb
DS
359 proto = ZEBRA_ROUTE_KERNEL;
360 break;
361 }
362 return proto;
363}
364
12f6fb97
DS
365/*
366Pending: create an efficient table_id (in a tree/hash) based lookup)
367 */
9d866c07 368vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
12f6fb97 369{
d62a17ae 370 struct vrf *vrf;
371 struct zebra_vrf *zvrf;
12f6fb97 372
a2addae8 373 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
78dd30b2
PG
374 zvrf = vrf->info;
375 if (zvrf == NULL)
d62a17ae 376 continue;
78dd30b2
PG
377 /* case vrf with netns : match the netnsid */
378 if (vrf_is_backend_netns()) {
379 if (ns_id == zvrf_id(zvrf))
380 return zvrf_id(zvrf);
381 } else {
382 /* VRF is VRF_BACKEND_VRF_LITE */
383 if (zvrf->table_id != table_id)
384 continue;
385 return zvrf_id(zvrf);
386 }
d62a17ae 387 }
12f6fb97 388
d62a17ae 389 return VRF_DEFAULT;
12f6fb97
DS
390}
391
87da6a60
SW
392/**
393 * @parse_encap_mpls() - Parses encapsulated mpls attributes
394 * @tb: Pointer to rtattr to look for nested items in.
395 * @labels: Pointer to store labels in.
396 *
397 * Return: Number of mpls labels found.
398 */
399static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
400{
401 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
402 mpls_lse_t *lses = NULL;
403 int num_labels = 0;
404 uint32_t ttl = 0;
405 uint32_t bos = 0;
406 uint32_t exp = 0;
407 mpls_label_t label = 0;
408
409 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
410 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
411 while (!bos && num_labels < MPLS_MAX_LABELS) {
412 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
413 labels[num_labels++] = label;
414 }
415
416 return num_labels;
417}
418
d49e6c4a
HS
419static enum seg6local_action_t
420parse_encap_seg6local(struct rtattr *tb,
421 struct seg6local_context *ctx)
422{
1bda3e62 423 struct rtattr *tb_encap[256] = {};
d49e6c4a
HS
424 enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
425
426 netlink_parse_rtattr_nested(tb_encap, 256, tb);
427
428 if (tb_encap[SEG6_LOCAL_ACTION])
429 act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]);
430
431 if (tb_encap[SEG6_LOCAL_NH4])
432 ctx->nh4 = *(struct in_addr *)RTA_DATA(
433 tb_encap[SEG6_LOCAL_NH4]);
434
435 if (tb_encap[SEG6_LOCAL_NH6])
436 ctx->nh6 = *(struct in6_addr *)RTA_DATA(
437 tb_encap[SEG6_LOCAL_NH6]);
438
439 if (tb_encap[SEG6_LOCAL_TABLE])
440 ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]);
441
7eab60a7
RS
442 if (tb_encap[SEG6_LOCAL_VRFTABLE])
443 ctx->table =
444 *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]);
445
d49e6c4a
HS
446 return act;
447}
448
f16de90b
HS
449static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs)
450{
1bda3e62 451 struct rtattr *tb_encap[256] = {};
f16de90b
HS
452 struct seg6_iptunnel_encap *ipt = NULL;
453 struct in6_addr *segments = NULL;
454
455 netlink_parse_rtattr_nested(tb_encap, 256, tb);
456
457 /*
458 * TODO: It's not support multiple SID list.
459 */
460 if (tb_encap[SEG6_IPTUNNEL_SRH]) {
461 ipt = (struct seg6_iptunnel_encap *)
462 RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]);
463 segments = ipt->srh[0].segments;
464 *segs = segments[0];
465 return 1;
466 }
467
468 return 0;
469}
470
471
77a44d94
SW
472static struct nexthop
473parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
474 enum blackhole_type bh_type, int index, void *prefsrc,
20822f9d 475 void *gate, afi_t afi, vrf_id_t vrf_id)
77a44d94
SW
476{
477 struct interface *ifp = NULL;
478 struct nexthop nh = {0};
479 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
480 int num_labels = 0;
b9596f13 481 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
1bda3e62
HS
482 struct seg6local_context seg6l_ctx = {};
483 struct in6_addr seg6_segs = {};
f16de90b 484 int num_segs = 0;
77a44d94 485
20822f9d 486 vrf_id_t nh_vrf_id = vrf_id;
77a44d94
SW
487 size_t sz = (afi == AFI_IP) ? 4 : 16;
488
489 if (bh_type == BLACKHOLE_UNSPEC) {
490 if (index && !gate)
491 nh.type = NEXTHOP_TYPE_IFINDEX;
492 else if (index && gate)
493 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
494 : NEXTHOP_TYPE_IPV6_IFINDEX;
495 else if (!index && gate)
496 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
497 : NEXTHOP_TYPE_IPV6;
498 else {
499 nh.type = NEXTHOP_TYPE_BLACKHOLE;
500 nh.bh_type = bh_type;
501 }
502 } else {
503 nh.type = NEXTHOP_TYPE_BLACKHOLE;
504 nh.bh_type = bh_type;
505 }
506 nh.ifindex = index;
507 if (prefsrc)
508 memcpy(&nh.src, prefsrc, sz);
509 if (gate)
510 memcpy(&nh.gate, gate, sz);
511
512 if (index) {
513 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
514 if (ifp)
096f7609 515 nh_vrf_id = ifp->vrf->vrf_id;
77a44d94
SW
516 }
517 nh.vrf_id = nh_vrf_id;
518
519 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
520 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
521 == LWTUNNEL_ENCAP_MPLS) {
522 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
523 }
d49e6c4a
HS
524 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
525 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
526 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
527 seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx);
528 }
f16de90b
HS
529 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
530 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
531 == LWTUNNEL_ENCAP_SEG6) {
532 num_segs = parse_encap_seg6(tb[RTA_ENCAP], &seg6_segs);
533 }
77a44d94
SW
534
535 if (rtm->rtm_flags & RTNH_F_ONLINK)
536 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
537
538 if (num_labels)
539 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
540
d49e6c4a 541 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
eab0f8f0 542 nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx);
d49e6c4a 543
f16de90b 544 if (num_segs)
eab0f8f0 545 nexthop_add_srv6_seg6(&nh, &seg6_segs);
f16de90b 546
77a44d94
SW
547 return nh;
548}
549
20822f9d 550static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
0eb97b86 551 struct nexthop_group *ng,
20822f9d
SW
552 struct rtmsg *rtm,
553 struct rtnexthop *rtnh,
554 struct rtattr **tb,
555 void *prefsrc, vrf_id_t vrf_id)
556{
557 void *gate = NULL;
558 struct interface *ifp = NULL;
559 int index = 0;
560 /* MPLS labels */
561 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
562 int num_labels = 0;
b9596f13 563 enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
1bda3e62
HS
564 struct seg6local_context seg6l_ctx = {};
565 struct in6_addr seg6_segs = {};
f16de90b 566 int num_segs = 0;
20822f9d
SW
567 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
568
569 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
570 vrf_id_t nh_vrf_id = vrf_id;
571
20822f9d
SW
572 for (;;) {
573 struct nexthop *nh = NULL;
574
575 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
576 break;
577
578 index = rtnh->rtnh_ifindex;
579 if (index) {
580 /*
581 * Yes we are looking this up
582 * for every nexthop and just
583 * using the last one looked
584 * up right now
585 */
586 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
587 index);
588 if (ifp)
096f7609 589 nh_vrf_id = ifp->vrf->vrf_id;
20822f9d
SW
590 else {
591 flog_warn(
592 EC_ZEBRA_UNKNOWN_INTERFACE,
593 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
15569c58 594 __func__, index);
20822f9d
SW
595 nh_vrf_id = VRF_DEFAULT;
596 }
597 } else
598 nh_vrf_id = vrf_id;
599
600 if (rtnh->rtnh_len > sizeof(*rtnh)) {
20822f9d
SW
601 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
602 rtnh->rtnh_len - sizeof(*rtnh));
603 if (rtnh_tb[RTA_GATEWAY])
604 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
605 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
606 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
607 == LWTUNNEL_ENCAP_MPLS) {
608 num_labels = parse_encap_mpls(
609 rtnh_tb[RTA_ENCAP], labels);
610 }
d49e6c4a
HS
611 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
612 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
613 == LWTUNNEL_ENCAP_SEG6_LOCAL) {
614 seg6l_act = parse_encap_seg6local(
615 rtnh_tb[RTA_ENCAP], &seg6l_ctx);
616 }
f16de90b
HS
617 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
618 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
619 == LWTUNNEL_ENCAP_SEG6) {
620 num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP],
621 &seg6_segs);
622 }
20822f9d
SW
623 }
624
f3354e16
SW
625 if (gate && rtm->rtm_family == AF_INET) {
626 if (index)
0eb97b86
MS
627 nh = nexthop_from_ipv4_ifindex(
628 gate, prefsrc, index, nh_vrf_id);
f3354e16 629 else
0eb97b86
MS
630 nh = nexthop_from_ipv4(gate, prefsrc,
631 nh_vrf_id);
f3354e16
SW
632 } else if (gate && rtm->rtm_family == AF_INET6) {
633 if (index)
0eb97b86
MS
634 nh = nexthop_from_ipv6_ifindex(
635 gate, index, nh_vrf_id);
f3354e16 636 else
0eb97b86 637 nh = nexthop_from_ipv6(gate, nh_vrf_id);
20822f9d 638 } else
0eb97b86 639 nh = nexthop_from_ifindex(index, nh_vrf_id);
20822f9d
SW
640
641 if (nh) {
df7fb580
DS
642 nh->weight = rtnh->rtnh_hops + 1;
643
20822f9d
SW
644 if (num_labels)
645 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
646 num_labels, labels);
647
d49e6c4a 648 if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
eab0f8f0
HS
649 nexthop_add_srv6_seg6local(nh, seg6l_act,
650 &seg6l_ctx);
d49e6c4a 651
f16de90b 652 if (num_segs)
eab0f8f0 653 nexthop_add_srv6_seg6(nh, &seg6_segs);
f16de90b 654
20822f9d
SW
655 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
656 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
0eb97b86
MS
657
658 /* Add to temporary list */
659 nexthop_group_add_sorted(ng, nh);
20822f9d
SW
660 }
661
662 if (rtnh->rtnh_len == 0)
663 break;
664
665 len -= NLMSG_ALIGN(rtnh->rtnh_len);
666 rtnh = RTNH_NEXT(rtnh);
667 }
668
0eb97b86 669 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
20822f9d
SW
670
671 return nhop_num;
672}
673
718e3744 674/* Looking up routing table by netlink interface. */
2414abd3 675static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
d62a17ae 676 int startup)
718e3744 677{
d62a17ae 678 int len;
679 struct rtmsg *rtm;
680 struct rtattr *tb[RTA_MAX + 1];
acde7f6b 681 uint32_t flags = 0;
d62a17ae 682 struct prefix p;
792fa92e 683 struct prefix_ipv6 src_p = {};
78dd30b2 684 vrf_id_t vrf_id;
6a6d11a3 685 bool selfroute;
d62a17ae 686
687 char anyaddr[16] = {0};
688
915902cb 689 int proto = ZEBRA_ROUTE_KERNEL;
d62a17ae 690 int index = 0;
691 int table;
692 int metric = 0;
d7c0a89a 693 uint32_t mtu = 0;
25715c7e 694 uint8_t distance = 0;
4e40b6d6 695 route_tag_t tag = 0;
fcc89a9c 696 uint32_t nhe_id = 0;
d62a17ae 697
698 void *dest = NULL;
699 void *gate = NULL;
700 void *prefsrc = NULL; /* IPv4 preferred source host address */
701 void *src = NULL; /* IPv6 srcdest source prefix */
e655a03c 702 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
d62a17ae 703
704 rtm = NLMSG_DATA(h);
705
706 if (startup && h->nlmsg_type != RTM_NEWROUTE)
707 return 0;
e655a03c
DL
708 switch (rtm->rtm_type) {
709 case RTN_UNICAST:
710 break;
711 case RTN_BLACKHOLE:
712 bh_type = BLACKHOLE_NULL;
713 break;
714 case RTN_UNREACHABLE:
715 bh_type = BLACKHOLE_REJECT;
716 break;
717 case RTN_PROHIBIT:
718 bh_type = BLACKHOLE_ADMINPROHIB;
719 break;
720 default:
8c8f250b
DS
721 if (IS_ZEBRA_DEBUG_KERNEL)
722 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
723 nl_rttype_to_str(rtm->rtm_type),
724 rtm->rtm_type);
d62a17ae 725 return 0;
e655a03c 726 }
d62a17ae 727
728 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 729 if (len < 0) {
15569c58
DA
730 zlog_err(
731 "%s: Message received from netlink is of a broken size %d %zu",
732 __func__, h->nlmsg_len,
733 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 734 return -1;
9bdf8618 735 }
d62a17ae 736
d62a17ae 737 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
738
739 if (rtm->rtm_flags & RTM_F_CLONED)
740 return 0;
741 if (rtm->rtm_protocol == RTPROT_REDIRECT)
742 return 0;
743 if (rtm->rtm_protocol == RTPROT_KERNEL)
744 return 0;
745
6a6d11a3
NN
746 selfroute = is_selfroute(rtm->rtm_protocol);
747
e4876266
DS
748 if (!startup && selfroute
749 && h->nlmsg_type == RTM_NEWROUTE
750 && !zrouter.asic_offloaded) {
6ab5222f
DS
751 if (IS_ZEBRA_DEBUG_KERNEL)
752 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
753 rtm->rtm_protocol);
d62a17ae 754 return 0;
6ab5222f 755 }
d62a17ae 756
757 /* We don't care about change notifications for the MPLS table. */
758 /* TODO: Revisit this. */
759 if (rtm->rtm_family == AF_MPLS)
760 return 0;
761
762 /* Table corresponding to route. */
763 if (tb[RTA_TABLE])
764 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
765 else
766 table = rtm->rtm_table;
767
768 /* Map to VRF */
78dd30b2 769 vrf_id = vrf_lookup_by_table(table, ns_id);
d62a17ae 770 if (vrf_id == VRF_DEFAULT) {
771 if (!is_zebra_valid_kernel_table(table)
772 && !is_zebra_main_routing_table(table))
773 return 0;
774 }
775
5a3cf853
DS
776 if (rtm->rtm_flags & RTM_F_TRAP)
777 flags |= ZEBRA_FLAG_TRAPPED;
778 if (rtm->rtm_flags & RTM_F_OFFLOAD)
779 flags |= ZEBRA_FLAG_OFFLOADED;
0d32fbee
DS
780 if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED)
781 flags |= ZEBRA_FLAG_OFFLOAD_FAILED;
5a3cf853 782
d62a17ae 783 /* Route which inserted by Zebra. */
6a6d11a3 784 if (selfroute) {
d62a17ae 785 flags |= ZEBRA_FLAG_SELFROUTE;
38e40db1 786 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
915902cb 787 }
d62a17ae 788 if (tb[RTA_OIF])
789 index = *(int *)RTA_DATA(tb[RTA_OIF]);
790
791 if (tb[RTA_DST])
792 dest = RTA_DATA(tb[RTA_DST]);
793 else
794 dest = anyaddr;
795
796 if (tb[RTA_SRC])
797 src = RTA_DATA(tb[RTA_SRC]);
798 else
799 src = anyaddr;
800
801 if (tb[RTA_PREFSRC])
802 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
803
804 if (tb[RTA_GATEWAY])
805 gate = RTA_DATA(tb[RTA_GATEWAY]);
806
fcc89a9c
SW
807 if (tb[RTA_NH_ID])
808 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
809
f19435a8
DS
810 if (tb[RTA_PRIORITY])
811 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
d62a17ae 812
4e40b6d6
KK
813#if defined(SUPPORT_REALMS)
814 if (tb[RTA_FLOW])
815 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
816#endif
817
f19435a8
DS
818 if (tb[RTA_METRICS]) {
819 struct rtattr *mxrta[RTAX_MAX + 1];
d62a17ae 820
996c9314 821 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
f19435a8 822 RTA_PAYLOAD(tb[RTA_METRICS]));
d62a17ae 823
f19435a8 824 if (mxrta[RTAX_MTU])
d7c0a89a 825 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
d62a17ae 826 }
827
828 if (rtm->rtm_family == AF_INET) {
829 p.family = AF_INET;
930571d2 830 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
e17d9b2d 831 zlog_err(
75829703 832 "Invalid destination prefix length: %u received from kernel route change",
930571d2 833 rtm->rtm_dst_len);
e17d9b2d 834 return -1;
930571d2 835 }
d62a17ae 836 memcpy(&p.u.prefix4, dest, 4);
837 p.prefixlen = rtm->rtm_dst_len;
838
1f610a1f 839 if (rtm->rtm_src_len != 0) {
9df414fe 840 flog_warn(
e914ccbe 841 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
2dbe669b
DA
842 "unsupported IPv4 sourcedest route (dest %pFX vrf %u)",
843 &p, vrf_id);
1f610a1f
CF
844 return 0;
845 }
930571d2 846
1f610a1f
CF
847 /* Force debug below to not display anything for source */
848 src_p.prefixlen = 0;
d62a17ae 849 } else if (rtm->rtm_family == AF_INET6) {
850 p.family = AF_INET6;
930571d2 851 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
e17d9b2d 852 zlog_err(
75829703 853 "Invalid destination prefix length: %u received from kernel route change",
930571d2 854 rtm->rtm_dst_len);
e17d9b2d 855 return -1;
930571d2 856 }
d62a17ae 857 memcpy(&p.u.prefix6, dest, 16);
858 p.prefixlen = rtm->rtm_dst_len;
859
860 src_p.family = AF_INET6;
930571d2 861 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
e17d9b2d 862 zlog_err(
75829703 863 "Invalid source prefix length: %u received from kernel route change",
930571d2 864 rtm->rtm_src_len);
e17d9b2d 865 return -1;
930571d2 866 }
d62a17ae 867 memcpy(&src_p.prefix, src, 16);
868 src_p.prefixlen = rtm->rtm_src_len;
deb28338
MS
869 } else {
870 /* We only handle the AFs we handle... */
871 if (IS_ZEBRA_DEBUG_KERNEL)
872 zlog_debug("%s: unknown address-family %u", __func__,
873 rtm->rtm_family);
874 return 0;
d62a17ae 875 }
876
25715c7e
DS
877 /*
878 * For ZEBRA_ROUTE_KERNEL types:
879 *
880 * The metric/priority of the route received from the kernel
881 * is a 32 bit number. We are going to interpret the high
882 * order byte as the Admin Distance and the low order 3 bytes
883 * as the metric.
884 *
885 * This will allow us to do two things:
886 * 1) Allow the creation of kernel routes that can be
887 * overridden by zebra.
888 * 2) Allow the old behavior for 'most' kernel route types
889 * if a user enters 'ip route ...' v4 routes get a metric
890 * of 0 and v6 routes get a metric of 1024. Both of these
891 * values will end up with a admin distance of 0, which
892 * will cause them to win for the purposes of zebra.
893 */
894 if (proto == ZEBRA_ROUTE_KERNEL) {
895 distance = (metric >> 24) & 0xFF;
996c9314 896 metric = (metric & 0x00FFFFFF);
25715c7e
DS
897 }
898
d62a17ae 899 if (IS_ZEBRA_DEBUG_KERNEL) {
d62a17ae 900 char buf2[PREFIX_STRLEN];
2dbe669b 901
bd47f3a3 902 zlog_debug(
2dbe669b
DA
903 "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
904 nl_msg_type_to_str(h->nlmsg_type), &p,
bd47f3a3
JU
905 src_p.prefixlen ? " from " : "",
906 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
907 : "",
908 vrf_id_to_name(vrf_id), vrf_id, table, metric,
909 distance);
d62a17ae 910 }
911
912 afi_t afi = AFI_IP;
913 if (rtm->rtm_family == AF_INET6)
914 afi = AFI_IP6;
915
916 if (h->nlmsg_type == RTM_NEWROUTE) {
8795f904 917
fd36be7e 918 if (!tb[RTA_MULTIPATH]) {
77a44d94 919 struct nexthop nh = {0};
8795f904 920
77a44d94
SW
921 if (!nhe_id) {
922 nh = parse_nexthop_unicast(
923 ns_id, rtm, tb, bh_type, index, prefsrc,
20822f9d 924 gate, afi, vrf_id);
87da6a60 925 }
4a7371e9 926 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
8032b717 927 &src_p, &nh, nhe_id, table, metric, mtu,
c6eee91f 928 distance, tag, startup);
fd36be7e 929 } else {
d62a17ae 930 /* This is a multipath route */
d62a17ae 931 struct route_entry *re;
0eb97b86 932 struct nexthop_group *ng = NULL;
d62a17ae 933 struct rtnexthop *rtnh =
934 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
d62a17ae 935
936 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
915902cb 937 re->type = proto;
25715c7e 938 re->distance = distance;
d62a17ae 939 re->flags = flags;
940 re->metric = metric;
941 re->mtu = mtu;
942 re->vrf_id = vrf_id;
943 re->table = table;
98572489 944 re->uptime = monotime(NULL);
4e40b6d6 945 re->tag = tag;
bbb322f2 946 re->nhe_id = nhe_id;
3c04071d 947
20822f9d 948 if (!nhe_id) {
0eb97b86
MS
949 uint8_t nhop_num;
950
951 /* Use temporary list of nexthops; parse
952 * message payload's nexthops.
953 */
954 ng = nexthop_group_new();
955 nhop_num =
20822f9d 956 parse_multipath_nexthops_unicast(
0eb97b86 957 ns_id, ng, rtm, rtnh, tb,
20822f9d
SW
958 prefsrc, vrf_id);
959
960 zserv_nexthop_num_warn(
961 __func__, (const struct prefix *)&p,
962 nhop_num);
0eb97b86
MS
963
964 if (nhop_num == 0) {
965 nexthop_group_delete(&ng);
966 ng = NULL;
967 }
d62a17ae 968 }
969
0eb97b86 970 if (nhe_id || ng)
1f610a1f 971 rib_add_multipath(afi, SAFI_UNICAST, &p,
c6eee91f 972 &src_p, re, ng, startup);
20822f9d
SW
973 else
974 XFREE(MTYPE_RE, re);
d62a17ae 975 }
976 } else {
bc541126
SW
977 if (nhe_id) {
978 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
979 &p, &src_p, NULL, nhe_id, table, metric,
3ceae22b 980 distance, true);
bc541126
SW
981 } else {
982 if (!tb[RTA_MULTIPATH]) {
983 struct nexthop nh;
760f39dc
HS
984
985 nh = parse_nexthop_unicast(
986 ns_id, rtm, tb, bh_type, index, prefsrc,
987 gate, afi, vrf_id);
bc541126
SW
988 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
989 flags, &p, &src_p, &nh, 0, table,
3ceae22b 990 metric, distance, true);
8ba5bd58 991 } else {
bc541126
SW
992 /* XXX: need to compare the entire list of
993 * nexthops here for NLM_F_APPEND stupidity */
994 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
995 flags, &p, &src_p, NULL, 0, table,
3ceae22b 996 metric, distance, true);
8ba5bd58 997 }
d62a17ae 998 }
999 }
1000
1001 return 0;
718e3744 1002}
1003
e3be0432
DS
1004static struct mcast_route_data *mroute = NULL;
1005
2414abd3 1006static int netlink_route_change_read_multicast(struct nlmsghdr *h,
d62a17ae 1007 ns_id_t ns_id, int startup)
565fdc75 1008{
d62a17ae 1009 int len;
1010 struct rtmsg *rtm;
1011 struct rtattr *tb[RTA_MAX + 1];
1012 struct mcast_route_data *m;
1013 struct mcast_route_data mr;
1014 int iif = 0;
1015 int count;
1016 int oif[256];
1017 int oif_count = 0;
d62a17ae 1018 char oif_list[256] = "\0";
78dd30b2 1019 vrf_id_t vrf;
43b5cc5e 1020 int table;
d62a17ae 1021
1022 if (mroute)
1023 m = mroute;
1024 else {
1025 memset(&mr, 0, sizeof(mr));
1026 m = &mr;
1027 }
1028
1029 rtm = NLMSG_DATA(h);
1030
1031 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1032
d62a17ae 1033 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
90d82769 1034
43b5cc5e
DS
1035 if (tb[RTA_TABLE])
1036 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
1037 else
1038 table = rtm->rtm_table;
1039
78dd30b2 1040 vrf = vrf_lookup_by_table(table, ns_id);
43b5cc5e 1041
d62a17ae 1042 if (tb[RTA_IIF])
1043 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
1044
1045 if (tb[RTA_SRC])
bd8b9272 1046 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
d62a17ae 1047
1048 if (tb[RTA_DST])
bd8b9272 1049 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
d62a17ae 1050
62819462 1051 if (tb[RTA_EXPIRES])
d62a17ae 1052 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
1053
1054 if (tb[RTA_MULTIPATH]) {
1055 struct rtnexthop *rtnh =
1056 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
1057
1058 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
1059 for (;;) {
1060 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
1061 break;
1062
1063 oif[oif_count] = rtnh->rtnh_ifindex;
1064 oif_count++;
1065
3c04071d
SW
1066 if (rtnh->rtnh_len == 0)
1067 break;
1068
d62a17ae 1069 len -= NLMSG_ALIGN(rtnh->rtnh_len);
1070 rtnh = RTNH_NEXT(rtnh);
1071 }
1072 }
1073
1074 if (IS_ZEBRA_DEBUG_KERNEL) {
822c9af2
SW
1075 struct interface *ifp = NULL;
1076 struct zebra_vrf *zvrf = NULL;
1077
d62a17ae 1078 for (count = 0; count < oif_count; count++) {
1079 ifp = if_lookup_by_index(oif[count], vrf);
1080 char temp[256];
1081
772270f3
QY
1082 snprintf(temp, sizeof(temp), "%s(%d) ",
1083 ifp ? ifp->name : "Unknown", oif[count]);
eab4a5c2 1084 strlcat(oif_list, temp, sizeof(oif_list));
d62a17ae 1085 }
822c9af2 1086 zvrf = zebra_vrf_lookup_by_id(vrf);
d62a17ae 1087 ifp = if_lookup_by_index(iif, vrf);
822c9af2 1088 zlog_debug(
9bcef951 1089 "MCAST VRF: %s(%d) %s (%pI4,%pI4) IIF: %s(%d) OIF: %s jiffies: %lld",
bd47f3a3 1090 zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
9bcef951
MS
1091 &m->sg.src, &m->sg.grp, ifp ? ifp->name : "Unknown",
1092 iif, oif_list,
822c9af2 1093 m->lastused);
90d82769 1094 }
d62a17ae 1095 return 0;
565fdc75
DS
1096}
1097
2414abd3 1098int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
565fdc75 1099{
d62a17ae 1100 int len;
d62a17ae 1101 struct rtmsg *rtm;
1102
1103 rtm = NLMSG_DATA(h);
1104
1105 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
1106 /* If this is not route add/delete message print warning. */
9165c5f5 1107 zlog_debug("Kernel message: %s NS %u",
87b5d1b0 1108 nl_msg_type_to_str(h->nlmsg_type), ns_id);
d62a17ae 1109 return 0;
1110 }
1111
c25e2f1a
DS
1112 if (!(rtm->rtm_family == AF_INET ||
1113 rtm->rtm_family == AF_INET6 ||
1114 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
9df414fe 1115 flog_warn(
e914ccbe 1116 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
1117 "Invalid address family: %u received from kernel route change: %s",
1118 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
1119 return 0;
1120 }
1121
d62a17ae 1122 /* Connected route. */
1123 if (IS_ZEBRA_DEBUG_KERNEL)
78dd30b2 1124 zlog_debug("%s %s %s proto %s NS %u",
d62a17ae 1125 nl_msg_type_to_str(h->nlmsg_type),
1126 nl_family_to_str(rtm->rtm_family),
1127 nl_rttype_to_str(rtm->rtm_type),
78dd30b2 1128 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
d62a17ae 1129
d62a17ae 1130
1131 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 1132 if (len < 0) {
15569c58
DA
1133 zlog_err(
1134 "%s: Message received from netlink is of a broken size: %d %zu",
1135 __func__, h->nlmsg_len,
1136 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 1137 return -1;
9bdf8618 1138 }
d62a17ae 1139
e655a03c 1140 if (rtm->rtm_type == RTN_MULTICAST)
2414abd3 1141 netlink_route_change_read_multicast(h, ns_id, startup);
e655a03c 1142 else
2414abd3 1143 netlink_route_change_read_unicast(h, ns_id, startup);
d62a17ae 1144 return 0;
565fdc75
DS
1145}
1146
289602d7 1147/* Request for specific route information from the kernel */
d62a17ae 1148static int netlink_request_route(struct zebra_ns *zns, int family, int type)
289602d7 1149{
d62a17ae 1150 struct {
1151 struct nlmsghdr n;
1152 struct rtmsg rtm;
1153 } req;
1154
1155 /* Form the request, specifying filter (rtattr) if needed. */
1156 memset(&req, 0, sizeof(req));
1157 req.n.nlmsg_type = type;
718f9b0f 1158 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 1159 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1160 req.rtm.rtm_family = family;
1161
fd3f8e52 1162 return netlink_request(&zns->netlink_cmd, &req);
289602d7 1163}
1164
718e3744 1165/* Routing table read function using netlink interface. Only called
1166 bootstrap time. */
d62a17ae 1167int netlink_route_read(struct zebra_ns *zns)
718e3744 1168{
d62a17ae 1169 int ret;
85a75f1e
MS
1170 struct zebra_dplane_info dp_info;
1171
1172 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 1173
1174 /* Get IPv4 routing table. */
1175 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1176 if (ret < 0)
1177 return ret;
1178 ret = netlink_parse_info(netlink_route_change_read_unicast,
9bfadae8 1179 &zns->netlink_cmd, &dp_info, 0, true);
d62a17ae 1180 if (ret < 0)
1181 return ret;
1182
1183 /* Get IPv6 routing table. */
1184 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1185 if (ret < 0)
1186 return ret;
1187 ret = netlink_parse_info(netlink_route_change_read_unicast,
9bfadae8 1188 &zns->netlink_cmd, &dp_info, 0, true);
d62a17ae 1189 if (ret < 0)
1190 return ret;
1191
1192 return 0;
718e3744 1193}
1194
0be6e7d7
JU
1195/*
1196 * The function returns true if the gateway info could be added
1197 * to the message, otherwise false is returned.
1198 */
1199static bool _netlink_route_add_gateway_info(uint8_t route_family,
312a6bee
JU
1200 uint8_t gw_family,
1201 struct nlmsghdr *nlmsg,
1202 size_t req_size, int bytelen,
1203 const struct nexthop *nexthop)
40c7bdb0 1204{
d62a17ae 1205 if (route_family == AF_MPLS) {
1206 struct gw_family_t gw_fam;
1207
1208 gw_fam.family = gw_family;
1209 if (gw_family == AF_INET)
1210 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1211 else
1212 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
0be6e7d7
JU
1213 if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1214 bytelen + 2))
1215 return false;
d62a17ae 1216 } else {
92d6f769
K
1217 if (!(nexthop->rparent
1218 && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1219 if (gw_family == AF_INET) {
1220 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1221 &nexthop->gate.ipv4, bytelen))
1222 return false;
1223 } else {
1224 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1225 &nexthop->gate.ipv6, bytelen))
1226 return false;
1227 }
0be6e7d7 1228 }
d62a17ae 1229 }
0be6e7d7
JU
1230
1231 return true;
40c7bdb0 1232}
1233
b7537db6
SW
1234static int build_label_stack(struct mpls_label_stack *nh_label,
1235 mpls_lse_t *out_lse, char *label_buf,
1236 size_t label_buf_size)
1237{
1238 char label_buf1[20];
1239 int num_labels = 0;
1240
1241 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1242 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1243 continue;
1244
1245 if (IS_ZEBRA_DEBUG_KERNEL) {
1246 if (!num_labels)
7533cad7
QY
1247 snprintf(label_buf, label_buf_size, "label %u",
1248 nh_label->label[i]);
b7537db6 1249 else {
772270f3
QY
1250 snprintf(label_buf1, sizeof(label_buf1), "/%u",
1251 nh_label->label[i]);
b7537db6
SW
1252 strlcat(label_buf, label_buf1, label_buf_size);
1253 }
1254 }
1255
1256 out_lse[num_labels] =
1257 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1258 num_labels++;
1259 }
1260
1261 return num_labels;
1262}
1263
a757997c
JU
1264static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label,
1265 struct nlmsghdr *nlmsg,
1266 size_t buflen, struct rtmsg *rtmsg,
1267 char *label_buf,
1268 size_t label_buf_size)
fa713d9e 1269{
d62a17ae 1270 mpls_lse_t out_lse[MPLS_MAX_LABELS];
a757997c 1271 int num_labels;
bd47f3a3 1272
d62a17ae 1273 /*
1274 * label_buf is *only* currently used within debugging.
1275 * As such when we assign it we are guarding it inside
1276 * a debug test. If you want to change this make sure
1277 * you fix this assumption
1278 */
1279 label_buf[0] = '\0';
d62a17ae 1280
a757997c
JU
1281 num_labels =
1282 build_label_stack(nh_label, out_lse, label_buf, label_buf_size);
fa712963
RW
1283
1284 if (num_labels) {
1285 /* Set the BoS bit */
1286 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1287
0be6e7d7 1288 if (rtmsg->rtm_family == AF_MPLS) {
a757997c 1289 if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
0be6e7d7
JU
1290 num_labels * sizeof(mpls_lse_t)))
1291 return false;
1292 } else {
fa712963 1293 struct rtattr *nest;
fa712963 1294
a757997c
JU
1295 if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1296 LWTUNNEL_ENCAP_MPLS))
0be6e7d7
JU
1297 return false;
1298
a757997c 1299 nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
0be6e7d7
JU
1300 if (!nest)
1301 return false;
1302
a757997c 1303 if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
0be6e7d7
JU
1304 &out_lse,
1305 num_labels * sizeof(mpls_lse_t)))
1306 return false;
312a6bee 1307 nl_attr_nest_end(nlmsg, nest);
66d42727 1308 }
0aabccc0 1309 }
fa713d9e 1310
a757997c
JU
1311 return true;
1312}
1313
1314static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1315 int family,
1316 struct nlmsghdr *nlmsg,
1317 size_t buflen, int bytelen)
1318{
1319 if (family == AF_INET) {
1320 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1321 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1322 &nexthop->rmap_src.ipv4, bytelen))
1323 return false;
1324 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1325 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1326 &nexthop->src.ipv4, bytelen))
1327 return false;
1328 }
1329 } else if (family == AF_INET6) {
1330 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1331 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1332 &nexthop->rmap_src.ipv6, bytelen))
1333 return false;
1334 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1335 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1336 &nexthop->src.ipv6, bytelen))
1337 return false;
1338 }
1339 }
1340
1341 return true;
1342}
1343
f463eac7
HS
1344static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen,
1345 const struct in6_addr *seg)
76fb7ae4
HS
1346{
1347 struct seg6_iptunnel_encap *ipt;
1348 struct ipv6_sr_hdr *srh;
1349 const size_t srhlen = 24;
4df9d859 1350
f463eac7
HS
1351 /*
1352 * Caution: Support only SINGLE-SID, not MULTI-SID
1353 * This function only supports the case where segs represents
1354 * a single SID. If you want to extend the SRv6 functionality,
1355 * you should improve the Boundary Check.
1356 * Ex. In case of set a SID-List include multiple-SIDs as an
1357 * argument of the Transit Behavior, we must support variable
1358 * boundary check for buflen.
1359 */
1360 if (buflen < (sizeof(struct seg6_iptunnel_encap) +
1361 sizeof(struct ipv6_sr_hdr) + 16))
1362 return -1;
1363
76fb7ae4
HS
1364 memset(buffer, 0, buflen);
1365
1366 ipt = (struct seg6_iptunnel_encap *)buffer;
1367 ipt->mode = SEG6_IPTUN_MODE_ENCAP;
1368 srh = ipt->srh;
1369 srh->hdrlen = (srhlen >> 3) - 1;
1370 srh->type = 4;
1371 srh->segments_left = 0;
1372 srh->first_segment = 0;
1373 memcpy(&srh->segments[0], seg, sizeof(struct in6_addr));
1374
1375 return srhlen + 4;
1376}
1377
a757997c
JU
1378/* This function takes a nexthop as argument and adds
1379 * the appropriate netlink attributes to an existing
1380 * netlink message.
1381 *
1382 * @param routedesc: Human readable description of route type
1383 * (direct/recursive, single-/multipath)
1384 * @param bytelen: Length of addresses in bytes.
1385 * @param nexthop: Nexthop information
1386 * @param nlmsg: nlmsghdr structure to fill in.
1387 * @param req_size: The size allocated for the message.
1388 *
1389 * The function returns true if the nexthop could be added
1390 * to the message, otherwise false is returned.
1391 */
1392static bool _netlink_route_build_singlepath(const struct prefix *p,
1393 const char *routedesc, int bytelen,
1394 const struct nexthop *nexthop,
1395 struct nlmsghdr *nlmsg,
1396 struct rtmsg *rtmsg,
1397 size_t req_size, int cmd)
1398{
1399
1400 char label_buf[256];
1401 struct vrf *vrf;
1402 char addrstr[INET6_ADDRSTRLEN];
1403
1404 assert(nexthop);
1405
1406 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1407
1408 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1409 req_size, rtmsg, label_buf,
1410 sizeof(label_buf)))
1411 return false;
1412
eab0f8f0
HS
1413 if (nexthop->nh_srv6) {
1414 if (nexthop->nh_srv6->seg6local_action !=
1415 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
1416 struct rtattr *nest;
1417 const struct seg6local_context *ctx;
52026569 1418
eab0f8f0
HS
1419 ctx = &nexthop->nh_srv6->seg6local_ctx;
1420 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1421 LWTUNNEL_ENCAP_SEG6_LOCAL))
52026569 1422 return false;
eab0f8f0
HS
1423
1424 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1425 if (!nest)
52026569 1426 return false;
eab0f8f0
HS
1427
1428 switch (nexthop->nh_srv6->seg6local_action) {
1429 case ZEBRA_SEG6_LOCAL_ACTION_END:
1430 if (!nl_attr_put32(nlmsg, req_size,
1431 SEG6_LOCAL_ACTION,
1432 SEG6_LOCAL_ACTION_END))
1433 return false;
1434 break;
1435 case ZEBRA_SEG6_LOCAL_ACTION_END_X:
1436 if (!nl_attr_put32(nlmsg, req_size,
1437 SEG6_LOCAL_ACTION,
1438 SEG6_LOCAL_ACTION_END_X))
1439 return false;
1440 if (!nl_attr_put(nlmsg, req_size,
1441 SEG6_LOCAL_NH6, &ctx->nh6,
1442 sizeof(struct in6_addr)))
1443 return false;
1444 break;
1445 case ZEBRA_SEG6_LOCAL_ACTION_END_T:
1446 if (!nl_attr_put32(nlmsg, req_size,
1447 SEG6_LOCAL_ACTION,
1448 SEG6_LOCAL_ACTION_END_T))
1449 return false;
1450 if (!nl_attr_put32(nlmsg, req_size,
1451 SEG6_LOCAL_TABLE,
1452 ctx->table))
1453 return false;
1454 break;
1455 case ZEBRA_SEG6_LOCAL_ACTION_END_DX4:
1456 if (!nl_attr_put32(nlmsg, req_size,
1457 SEG6_LOCAL_ACTION,
1458 SEG6_LOCAL_ACTION_END_DX4))
1459 return false;
1460 if (!nl_attr_put(nlmsg, req_size,
1461 SEG6_LOCAL_NH4, &ctx->nh4,
1462 sizeof(struct in_addr)))
1463 return false;
1464 break;
1465 case ZEBRA_SEG6_LOCAL_ACTION_END_DT6:
1466 if (!nl_attr_put32(nlmsg, req_size,
1467 SEG6_LOCAL_ACTION,
1468 SEG6_LOCAL_ACTION_END_DT6))
1469 return false;
1470 if (!nl_attr_put32(nlmsg, req_size,
1471 SEG6_LOCAL_TABLE,
1472 ctx->table))
1473 return false;
1474 break;
7eab60a7
RS
1475 case ZEBRA_SEG6_LOCAL_ACTION_END_DT4:
1476 if (!nl_attr_put32(nlmsg, req_size,
1477 SEG6_LOCAL_ACTION,
1478 SEG6_LOCAL_ACTION_END_DT4))
1479 return false;
1480 if (!nl_attr_put32(nlmsg, req_size,
1481 SEG6_LOCAL_VRFTABLE,
1482 ctx->table))
1483 return false;
1484 break;
eab0f8f0
HS
1485 default:
1486 zlog_err("%s: unsupport seg6local behaviour action=%u",
1487 __func__,
1488 nexthop->nh_srv6->seg6local_action);
0a543b79 1489 return false;
eab0f8f0
HS
1490 }
1491 nl_attr_nest_end(nlmsg, nest);
1492 }
1493
1494 if (!sid_zero(&nexthop->nh_srv6->seg6_segs)) {
1495 char tun_buf[4096];
1496 ssize_t tun_len;
1497 struct rtattr *nest;
1498
1499 if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1500 LWTUNNEL_ENCAP_SEG6))
52026569 1501 return false;
eab0f8f0
HS
1502 nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1503 if (!nest)
52026569 1504 return false;
eab0f8f0
HS
1505 tun_len = fill_seg6ipt_encap(tun_buf, sizeof(tun_buf),
1506 &nexthop->nh_srv6->seg6_segs);
1507 if (tun_len < 0)
52026569 1508 return false;
eab0f8f0
HS
1509 if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH,
1510 tun_buf, tun_len))
52026569 1511 return false;
eab0f8f0 1512 nl_attr_nest_end(nlmsg, nest);
8689b25a 1513 }
76fb7ae4
HS
1514 }
1515
d62a17ae 1516 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1517 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1518
002e5c43 1519 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
d62a17ae 1520 rtmsg->rtm_flags |= RTNH_F_ONLINK;
0be6e7d7
JU
1521 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1522 return false;
1523 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1524 return false;
d62a17ae 1525
a757997c
JU
1526 if (cmd == RTM_NEWROUTE) {
1527 if (!_netlink_route_encode_nexthop_src(
1528 nexthop, AF_INET, nlmsg, req_size, bytelen))
0be6e7d7
JU
1529 return false;
1530 }
d62a17ae 1531
1532 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1533 zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1534 __func__, routedesc, p, ipv4_ll_buf,
1535 label_buf, nexthop->ifindex,
1536 VRF_LOGNAME(vrf), nexthop->vrf_id);
0be6e7d7 1537 return true;
0aabccc0
DD
1538 }
1539
d62a17ae 1540 if (nexthop->type == NEXTHOP_TYPE_IPV4
1541 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1542 /* Send deletes to the kernel without specifying the next-hop */
0be6e7d7
JU
1543 if (cmd != RTM_DELROUTE) {
1544 if (!_netlink_route_add_gateway_info(
1545 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1546 bytelen, nexthop))
1547 return false;
1548 }
d62a17ae 1549
1550 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1551 if (!_netlink_route_encode_nexthop_src(
1552 nexthop, AF_INET, nlmsg, req_size, bytelen))
1553 return false;
d62a17ae 1554 }
1555
9266b315
RZ
1556 if (IS_ZEBRA_DEBUG_KERNEL) {
1557 inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1558 sizeof(addrstr));
1559 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1560 __func__, routedesc, p, addrstr, label_buf,
1561 nexthop->ifindex, VRF_LOGNAME(vrf),
1562 nexthop->vrf_id);
1563 }
0aabccc0 1564 }
fa713d9e 1565
d62a17ae 1566 if (nexthop->type == NEXTHOP_TYPE_IPV6
1567 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
0be6e7d7
JU
1568 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1569 AF_INET6, nlmsg, req_size,
1570 bytelen, nexthop))
1571 return false;
d62a17ae 1572
1573 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1574 if (!_netlink_route_encode_nexthop_src(
1575 nexthop, AF_INET6, nlmsg, req_size,
1576 bytelen))
1577 return false;
d62a17ae 1578 }
fa713d9e 1579
9266b315
RZ
1580 if (IS_ZEBRA_DEBUG_KERNEL) {
1581 inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1582 sizeof(addrstr));
1583 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1584 __func__, routedesc, p, addrstr, label_buf,
1585 nexthop->ifindex, VRF_LOGNAME(vrf),
1586 nexthop->vrf_id);
1587 }
d62a17ae 1588 }
5e210522
DS
1589
1590 /*
1591 * We have the ifindex so we should always send it
1592 * This is especially useful if we are doing route
1593 * leaking.
1594 */
0be6e7d7
JU
1595 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1596 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1597 return false;
1598 }
d62a17ae 1599
275565fb 1600 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1601 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1602 if (!_netlink_route_encode_nexthop_src(
1603 nexthop, AF_INET, nlmsg, req_size, bytelen))
1604 return false;
d62a17ae 1605 }
fa713d9e 1606
d62a17ae 1607 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1608 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1609 __func__, routedesc, p, nexthop->ifindex,
1610 VRF_LOGNAME(vrf), nexthop->vrf_id);
0aabccc0 1611 }
0be6e7d7
JU
1612
1613 return true;
fa713d9e
CF
1614}
1615
1616/* This function takes a nexthop as argument and
312a6bee 1617 * appends to the given netlink msg. If the nexthop
fa713d9e
CF
1618 * defines a preferred source, the src parameter
1619 * will be modified to point to that src, otherwise
1620 * it will be kept unmodified.
1621 *
1622 * @param routedesc: Human readable description of route type
1623 * (direct/recursive, single-/multipath)
1624 * @param bytelen: Length of addresses in bytes.
1625 * @param nexthop: Nexthop information
312a6bee
JU
1626 * @param nlmsg: nlmsghdr structure to fill in.
1627 * @param req_size: The size allocated for the message.
fa713d9e
CF
1628 * @param src: pointer pointing to a location where
1629 * the prefsrc should be stored.
0be6e7d7
JU
1630 *
1631 * The function returns true if the nexthop could be added
1632 * to the message, otherwise false is returned.
fa713d9e 1633 */
0be6e7d7 1634static bool _netlink_route_build_multipath(const struct prefix *p,
312a6bee
JU
1635 const char *routedesc, int bytelen,
1636 const struct nexthop *nexthop,
1637 struct nlmsghdr *nlmsg,
1638 size_t req_size, struct rtmsg *rtmsg,
1639 const union g_addr **src)
fa713d9e 1640{
9a62e84b 1641 char label_buf[256];
bd47f3a3 1642 struct vrf *vrf;
312a6bee 1643 struct rtnexthop *rtnh;
d62a17ae 1644
312a6bee 1645 rtnh = nl_attr_rtnh(nlmsg, req_size);
0be6e7d7
JU
1646 if (rtnh == NULL)
1647 return false;
d62a17ae 1648
b7537db6
SW
1649 assert(nexthop);
1650
bd47f3a3
JU
1651 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1652
a757997c
JU
1653 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1654 req_size, rtmsg, label_buf,
1655 sizeof(label_buf)))
1656 return false;
d62a17ae 1657
1658 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1659 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1660
002e5c43 1661 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
d62a17ae 1662 rtnh->rtnh_flags |= RTNH_F_ONLINK;
a757997c 1663 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
0be6e7d7 1664 return false;
d62a17ae 1665 rtnh->rtnh_ifindex = nexthop->ifindex;
8d27e1aa 1666 if (nexthop->weight)
1667 rtnh->rtnh_hops = nexthop->weight - 1;
d62a17ae 1668
975a328e 1669 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1670 *src = &nexthop->rmap_src;
975a328e 1671 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1672 *src = &nexthop->src;
1673
1674 if (IS_ZEBRA_DEBUG_KERNEL)
1675 zlog_debug(
9266b315
RZ
1676 "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1677 __func__, routedesc, p, ipv4_ll_buf, label_buf,
bd47f3a3
JU
1678 nexthop->ifindex, VRF_LOGNAME(vrf),
1679 nexthop->vrf_id);
312a6bee 1680 nl_attr_rtnh_end(nlmsg, rtnh);
0be6e7d7 1681 return true;
d62a17ae 1682 }
1683
1684 if (nexthop->type == NEXTHOP_TYPE_IPV4
1685 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
0be6e7d7
JU
1686 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1687 nlmsg, req_size, bytelen,
1688 nexthop))
1689 return false;
1690
975a328e 1691 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1692 *src = &nexthop->rmap_src;
975a328e 1693 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1694 *src = &nexthop->src;
1695
a50404aa
RZ
1696 if (IS_ZEBRA_DEBUG_KERNEL)
1697 zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1698 __func__, routedesc, p, &nexthop->gate.ipv4,
1699 label_buf, nexthop->ifindex,
1700 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1701 }
1702 if (nexthop->type == NEXTHOP_TYPE_IPV6
1703 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
0be6e7d7
JU
1704 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1705 AF_INET6, nlmsg, req_size,
1706 bytelen, nexthop))
1707 return false;
d62a17ae 1708
1709 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1710 *src = &nexthop->rmap_src;
1711 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1712 *src = &nexthop->src;
1713
a50404aa
RZ
1714 if (IS_ZEBRA_DEBUG_KERNEL)
1715 zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1716 __func__, routedesc, p, &nexthop->gate.ipv6,
1717 label_buf, nexthop->ifindex,
1718 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1719 }
5e210522
DS
1720
1721 /*
1722 * We have figured out the ifindex so we should always send it
1723 * This is especially useful if we are doing route
1724 * leaking.
1725 */
1726 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1727 rtnh->rtnh_ifindex = nexthop->ifindex;
1728
d62a17ae 1729 /* ifindex */
275565fb 1730 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
975a328e 1731 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1732 *src = &nexthop->rmap_src;
975a328e 1733 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1734 *src = &nexthop->src;
1735
1736 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1737 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1738 __func__, routedesc, p, nexthop->ifindex,
1739 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1740 }
df7fb580
DS
1741
1742 if (nexthop->weight)
1743 rtnh->rtnh_hops = nexthop->weight - 1;
0be6e7d7 1744
312a6bee 1745 nl_attr_rtnh_end(nlmsg, rtnh);
0be6e7d7 1746 return true;
fa713d9e
CF
1747}
1748
f2595bd5
DS
1749static inline bool
1750_netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc,
1751 const struct zebra_nhlfe *nhlfe,
1752 struct nlmsghdr *nlmsg, struct rtmsg *rtmsg,
1753 size_t req_size, int cmd)
40c7bdb0 1754{
d62a17ae 1755 int bytelen;
d7c0a89a 1756 uint8_t family;
40c7bdb0 1757
d62a17ae 1758 family = NHLFE_FAMILY(nhlfe);
1759 bytelen = (family == AF_INET ? 4 : 16);
0be6e7d7
JU
1760 return _netlink_route_build_singlepath(p, routedesc, bytelen,
1761 nhlfe->nexthop, nlmsg, rtmsg,
1762 req_size, cmd);
40c7bdb0 1763}
1764
1765
0be6e7d7 1766static inline bool
9a0132a5 1767_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
f2595bd5 1768 const struct zebra_nhlfe *nhlfe,
312a6bee
JU
1769 struct nlmsghdr *nlmsg, size_t req_size,
1770 struct rtmsg *rtmsg, const union g_addr **src)
40c7bdb0 1771{
d62a17ae 1772 int bytelen;
d7c0a89a 1773 uint8_t family;
40c7bdb0 1774
d62a17ae 1775 family = NHLFE_FAMILY(nhlfe);
1776 bytelen = (family == AF_INET ? 4 : 16);
0be6e7d7
JU
1777 return _netlink_route_build_multipath(p, routedesc, bytelen,
1778 nhlfe->nexthop, nlmsg, req_size,
1779 rtmsg, src);
40c7bdb0 1780}
1781
d7c0a89a 1782static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
40c7bdb0 1783{
d62a17ae 1784 if (IS_ZEBRA_DEBUG_KERNEL)
0be6e7d7
JU
1785 zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1786 routedesc, nl_msg_type_to_str(cmd), label);
fa713d9e
CF
1787}
1788
05657ec2
PG
1789static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla,
1790 int llalen, ns_id_t ns_id, uint8_t family,
1791 bool permanent, uint8_t protocol)
5c610faf 1792{
d62a17ae 1793 struct {
1794 struct nlmsghdr n;
1795 struct ndmsg ndm;
1796 char buf[256];
1797 } req;
5c610faf 1798
5895d33f 1799 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
8f7d9fc0 1800
5605ecfc 1801 memset(&req, 0, sizeof(req));
5c610faf 1802
d62a17ae 1803 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1804 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1805 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1806 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
a55ba23f 1807
05657ec2 1808 req.ndm.ndm_family = family;
d62a17ae 1809 req.ndm.ndm_ifindex = ifindex;
1810 req.ndm.ndm_type = RTN_UNICAST;
05657ec2
PG
1811 if (cmd == RTM_NEWNEIGH) {
1812 if (!permanent)
1813 req.ndm.ndm_state = NUD_REACHABLE;
1814 else
1815 req.ndm.ndm_state = NUD_PERMANENT;
1816 } else
1817 req.ndm.ndm_state = NUD_FAILED;
5c610faf 1818
312a6bee
JU
1819 nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
1820 sizeof(protocol));
05657ec2 1821 req.ndm.ndm_type = RTN_UNICAST;
df948efc
PG
1822 nl_attr_put(&req.n, sizeof(req), NDA_DST, addr,
1823 family2addrsize(family));
05657ec2
PG
1824 if (lla)
1825 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
5c610faf 1826
d62a17ae 1827 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
9bfadae8 1828 false);
5c610faf
DS
1829}
1830
762288f5
SW
1831static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1832 union g_addr *src)
1833{
1834 if (family == AF_INET) {
1835 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1836 src->ipv4 = nexthop->rmap_src.ipv4;
1837 return true;
1838 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1839 src->ipv4 = nexthop->src.ipv4;
1840 return true;
1841 }
1842 } else if (family == AF_INET6) {
1843 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1844 src->ipv6 = nexthop->rmap_src.ipv6;
1845 return true;
1846 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1847 src->ipv6 = nexthop->src.ipv6;
1848 return true;
1849 }
1850 }
1851
1852 return false;
1853}
1854
0be6e7d7
JU
1855/*
1856 * The function returns true if the attribute could be added
1857 * to the message, otherwise false is returned.
1858 */
1859static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
1860 struct nexthop *nh)
f2a0ba3a
RZ
1861{
1862 struct rtattr *nest;
1863
1864 switch (nh->nh_encap_type) {
1865 case NET_VXLAN:
a757997c 1866 if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
0be6e7d7 1867 return false;
f2a0ba3a 1868
312a6bee 1869 nest = nl_attr_nest(n, nlen, RTA_ENCAP);
0be6e7d7
JU
1870 if (!nest)
1871 return false;
1872
1873 if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
1874 nh->nh_encap.vni))
1875 return false;
312a6bee 1876 nl_attr_nest_end(n, nest);
f2a0ba3a
RZ
1877 break;
1878 }
0be6e7d7
JU
1879
1880 return true;
f2a0ba3a
RZ
1881}
1882
7cdb1a84
MS
1883/*
1884 * Routing table change via netlink interface, using a dataplane context object
0be6e7d7
JU
1885 *
1886 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
1887 * otherwise the number of bytes written to buf.
7cdb1a84 1888 */
0be6e7d7
JU
1889ssize_t netlink_route_multipath_msg_encode(int cmd,
1890 struct zebra_dplane_ctx *ctx,
1891 uint8_t *data, size_t datalen,
1892 bool fpm, bool force_nhg)
7cdb1a84
MS
1893{
1894 int bytelen;
7cdb1a84
MS
1895 struct nexthop *nexthop = NULL;
1896 unsigned int nexthop_num;
7cdb1a84 1897 const char *routedesc;
762288f5 1898 bool setsrc = false;
7cdb1a84
MS
1899 union g_addr src;
1900 const struct prefix *p, *src_p;
1901 uint32_t table_id;
d4000d7b 1902 struct nlsock *nl;
7cdb1a84
MS
1903
1904 struct {
1905 struct nlmsghdr n;
1906 struct rtmsg r;
e57a3fab
RZ
1907 char buf[];
1908 } *req = (void *)data;
7cdb1a84
MS
1909
1910 p = dplane_ctx_get_dest(ctx);
1911 src_p = dplane_ctx_get_src(ctx);
1912
0be6e7d7
JU
1913 if (datalen < sizeof(*req))
1914 return 0;
1915
d4000d7b
DS
1916 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
1917
e57a3fab 1918 memset(req, 0, sizeof(*req));
7cdb1a84 1919
b9c87515 1920 bytelen = (p->family == AF_INET ? 4 : 16);
7cdb1a84 1921
e57a3fab
RZ
1922 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1923 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
7cdb1a84 1924
334734a8
DS
1925 if ((cmd == RTM_NEWROUTE) &&
1926 ((p->family == AF_INET) || v6_rr_semantics))
e57a3fab 1927 req->n.nlmsg_flags |= NLM_F_REPLACE;
7cdb1a84 1928
e57a3fab 1929 req->n.nlmsg_type = cmd;
7cdb1a84 1930
d4000d7b 1931 req->n.nlmsg_pid = nl->snl.nl_pid;
7cdb1a84 1932
b9c87515 1933 req->r.rtm_family = p->family;
e57a3fab
RZ
1934 req->r.rtm_dst_len = p->prefixlen;
1935 req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1936 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
7cdb1a84 1937
5709131c 1938 if (cmd == RTM_DELROUTE)
e57a3fab 1939 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
5709131c 1940 else
e57a3fab 1941 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
7cdb1a84
MS
1942
1943 /*
1944 * blackhole routes are not RTN_UNICAST, they are
1945 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1946 * so setting this value as a RTN_UNICAST would
1947 * cause the route lookup of just the prefix
1948 * to fail. So no need to specify this for
1949 * the RTM_DELROUTE case
1950 */
1951 if (cmd != RTM_DELROUTE)
e57a3fab 1952 req->r.rtm_type = RTN_UNICAST;
7cdb1a84 1953
0be6e7d7
JU
1954 if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
1955 return 0;
1956 if (src_p) {
1957 if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
1958 bytelen))
1959 return 0;
1960 }
7cdb1a84
MS
1961
1962 /* Metric. */
1963 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1964 * used
1965 * either by the kernel or by zebra. Its purely for calculating best
1966 * path(s)
1967 * by the routing protocol and for communicating with protocol peers.
1968 */
0be6e7d7
JU
1969 if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
1970 NL_DEFAULT_ROUTE_METRIC))
1971 return 0;
7cdb1a84
MS
1972
1973#if defined(SUPPORT_REALMS)
1974 {
1975 route_tag_t tag;
1976
5709131c 1977 if (cmd == RTM_DELROUTE)
7cdb1a84 1978 tag = dplane_ctx_get_old_tag(ctx);
5709131c 1979 else
7cdb1a84 1980 tag = dplane_ctx_get_tag(ctx);
7cdb1a84 1981
0be6e7d7
JU
1982 if (tag > 0 && tag <= 255) {
1983 if (!nl_attr_put32(&req->n, datalen, RTA_FLOW, tag))
1984 return 0;
1985 }
7cdb1a84
MS
1986 }
1987#endif
1988 /* Table corresponding to this route. */
1989 table_id = dplane_ctx_get_table(ctx);
1990 if (table_id < 256)
e57a3fab 1991 req->r.rtm_table = table_id;
7cdb1a84 1992 else {
e57a3fab 1993 req->r.rtm_table = RT_TABLE_UNSPEC;
0be6e7d7
JU
1994 if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
1995 return 0;
7cdb1a84
MS
1996 }
1997
9266b315
RZ
1998 if (IS_ZEBRA_DEBUG_KERNEL)
1999 zlog_debug(
2000 "%s: %s %pFX vrf %u(%u)", __func__,
2001 nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
2002 table_id);
7cdb1a84
MS
2003
2004 /*
2005 * If we are not updating the route and we have received
2006 * a route delete, then all we need to fill in is the
2007 * prefix information to tell the kernel to schwack
2008 * it.
2009 */
2010 if (cmd == RTM_DELROUTE)
0be6e7d7 2011 return NLMSG_ALIGN(req->n.nlmsg_len);
7cdb1a84
MS
2012
2013 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
312a6bee 2014 struct rtattr *nest;
7cdb1a84
MS
2015 uint32_t mtu = dplane_ctx_get_mtu(ctx);
2016 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
5709131c 2017
7cdb1a84
MS
2018 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
2019 mtu = nexthop_mtu;
312a6bee
JU
2020
2021 nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
0be6e7d7
JU
2022 if (nest == NULL)
2023 return 0;
2024
2025 if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
2026 return 0;
312a6bee 2027 nl_attr_nest_end(&req->n, nest);
7cdb1a84
MS
2028 }
2029
4be03ff4
IR
2030 /*
2031 * Always install blackhole routes without using nexthops, because of
2032 * the following kernel problems:
2033 * 1. Kernel nexthops don't suport unreachable/prohibit route types.
2034 * 2. Blackhole kernel nexthops are deleted when loopback is down.
2035 */
2036 nexthop = dplane_ctx_get_ng(ctx)->nexthop;
2037 if (nexthop) {
2038 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2039 nexthop = nexthop->resolved;
2040
2041 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
2042 switch (nexthop->bh_type) {
2043 case BLACKHOLE_ADMINPROHIB:
2044 req->r.rtm_type = RTN_PROHIBIT;
2045 break;
2046 case BLACKHOLE_REJECT:
2047 req->r.rtm_type = RTN_UNREACHABLE;
2048 break;
2049 default:
2050 req->r.rtm_type = RTN_BLACKHOLE;
2051 break;
2052 }
2053 return NLMSG_ALIGN(req->n.nlmsg_len);
2054 }
2055 }
2056
6c67f41f
SW
2057 if ((!fpm && kernel_nexthops_supported()
2058 && (!proto_nexthops_only()
2059 || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0)))
2060 || (fpm && force_nhg)) {
d8bfd8dc 2061 /* Kernel supports nexthop objects */
9a0132a5 2062 if (IS_ZEBRA_DEBUG_KERNEL)
0be6e7d7
JU
2063 zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
2064 dplane_ctx_get_nhe_id(ctx));
e57a3fab 2065
0be6e7d7
JU
2066 if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
2067 dplane_ctx_get_nhe_id(ctx)))
2068 return 0;
d8bfd8dc
SW
2069
2070 /* Have to determine src still */
2071 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2072 if (setsrc)
2073 break;
2074
e57a3fab 2075 setsrc = nexthop_set_src(nexthop, p->family, &src);
d8bfd8dc
SW
2076 }
2077
2078 if (setsrc) {
0be6e7d7
JU
2079 if (p->family == AF_INET) {
2080 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2081 &src.ipv4, bytelen))
2082 return 0;
2083 } else if (p->family == AF_INET6) {
2084 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2085 &src.ipv6, bytelen))
2086 return 0;
2087 }
d8bfd8dc 2088 }
f78fe8f3 2089
0be6e7d7 2090 return NLMSG_ALIGN(req->n.nlmsg_len);
de3f5488
SW
2091 }
2092
7cdb1a84 2093 /* Count overall nexthops so we can decide whether to use singlepath
5709131c
MS
2094 * or multipath case.
2095 */
7cdb1a84
MS
2096 nexthop_num = 0;
2097 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2098 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2099 continue;
b9c87515 2100 if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
7cdb1a84
MS
2101 continue;
2102
2103 nexthop_num++;
2104 }
2105
2106 /* Singlepath case. */
220f0f42 2107 if (nexthop_num == 1) {
7cdb1a84
MS
2108 nexthop_num = 0;
2109 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
7cdb1a84
MS
2110 if (CHECK_FLAG(nexthop->flags,
2111 NEXTHOP_FLAG_RECURSIVE)) {
5709131c
MS
2112
2113 if (setsrc)
2114 continue;
2115
b9c87515
RZ
2116 setsrc = nexthop_set_src(nexthop, p->family,
2117 &src);
f183e380 2118 continue;
7cdb1a84
MS
2119 }
2120
b9c87515 2121 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
7cdb1a84
MS
2122 routedesc = nexthop->rparent
2123 ? "recursive, single-path"
2124 : "single-path";
2125
0be6e7d7
JU
2126 if (!_netlink_route_build_singlepath(
2127 p, routedesc, bytelen, nexthop,
2128 &req->n, &req->r, datalen, cmd))
2129 return 0;
7cdb1a84
MS
2130 nexthop_num++;
2131 break;
2132 }
f2a0ba3a
RZ
2133
2134 /*
2135 * Add encapsulation information when installing via
2136 * FPM.
2137 */
0be6e7d7
JU
2138 if (fpm) {
2139 if (!netlink_route_nexthop_encap(
2140 &req->n, datalen, nexthop))
2141 return 0;
2142 }
7cdb1a84 2143 }
f2a0ba3a 2144
13e0321a 2145 if (setsrc) {
0be6e7d7
JU
2146 if (p->family == AF_INET) {
2147 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2148 &src.ipv4, bytelen))
2149 return 0;
2150 } else if (p->family == AF_INET6) {
2151 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2152 &src.ipv6, bytelen))
2153 return 0;
2154 }
7cdb1a84
MS
2155 }
2156 } else { /* Multipath case */
312a6bee 2157 struct rtattr *nest;
81793ac1 2158 const union g_addr *src1 = NULL;
7cdb1a84 2159
312a6bee 2160 nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
0be6e7d7
JU
2161 if (nest == NULL)
2162 return 0;
7cdb1a84
MS
2163
2164 nexthop_num = 0;
2165 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
7cdb1a84
MS
2166 if (CHECK_FLAG(nexthop->flags,
2167 NEXTHOP_FLAG_RECURSIVE)) {
2168 /* This only works for IPv4 now */
5709131c
MS
2169 if (setsrc)
2170 continue;
2171
b9c87515
RZ
2172 setsrc = nexthop_set_src(nexthop, p->family,
2173 &src);
78e54ded 2174 continue;
7cdb1a84
MS
2175 }
2176
b9c87515 2177 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
7cdb1a84
MS
2178 routedesc = nexthop->rparent
2179 ? "recursive, multipath"
2180 : "multipath";
2181 nexthop_num++;
2182
0be6e7d7
JU
2183 if (!_netlink_route_build_multipath(
2184 p, routedesc, bytelen, nexthop,
2185 &req->n, datalen, &req->r, &src1))
2186 return 0;
7cdb1a84
MS
2187
2188 if (!setsrc && src1) {
b9c87515 2189 if (p->family == AF_INET)
7cdb1a84 2190 src.ipv4 = src1->ipv4;
b9c87515 2191 else if (p->family == AF_INET6)
7cdb1a84
MS
2192 src.ipv6 = src1->ipv6;
2193
2194 setsrc = 1;
2195 }
2196 }
312a6bee 2197 }
0be6e7d7 2198
312a6bee
JU
2199 nl_attr_nest_end(&req->n, nest);
2200
2201 /*
2202 * Add encapsulation information when installing via
2203 * FPM.
2204 */
2205 if (fpm) {
2206 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
2207 nexthop)) {
2208 if (CHECK_FLAG(nexthop->flags,
2209 NEXTHOP_FLAG_RECURSIVE))
2210 continue;
0be6e7d7
JU
2211 if (!netlink_route_nexthop_encap(
2212 &req->n, datalen, nexthop))
2213 return 0;
312a6bee 2214 }
7cdb1a84 2215 }
f2a0ba3a 2216
312a6bee 2217
13e0321a 2218 if (setsrc) {
0be6e7d7
JU
2219 if (p->family == AF_INET) {
2220 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2221 &src.ipv4, bytelen))
2222 return 0;
2223 } else if (p->family == AF_INET6) {
2224 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2225 &src.ipv6, bytelen))
2226 return 0;
2227 }
7cdb1a84
MS
2228 if (IS_ZEBRA_DEBUG_KERNEL)
2229 zlog_debug("Setting source");
2230 }
7cdb1a84
MS
2231 }
2232
2233 /* If there is no useful nexthop then return. */
2234 if (nexthop_num == 0) {
2235 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315 2236 zlog_debug("%s: No useful nexthop.", __func__);
7cdb1a84
MS
2237 }
2238
312a6bee 2239 return NLMSG_ALIGN(req->n.nlmsg_len);
7cdb1a84
MS
2240}
2241
43b5cc5e 2242int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
e3be0432 2243{
5523c156 2244 uint32_t actual_table;
d62a17ae 2245 int suc = 0;
2246 struct mcast_route_data *mr = (struct mcast_route_data *)in;
bd8b9272
DS
2247 struct {
2248 struct nlmsghdr n;
2249 struct ndmsg ndm;
2250 char buf[256];
2251 } req;
e3be0432 2252
d62a17ae 2253 mroute = mr;
5895d33f 2254 struct zebra_ns *zns;
bd8b9272 2255
009f8ad5 2256 zns = zvrf->zns;
5605ecfc 2257 memset(&req, 0, sizeof(req));
bd8b9272
DS
2258
2259 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2260 req.n.nlmsg_flags = NLM_F_REQUEST;
2261 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2262
2263 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
2264 req.n.nlmsg_type = RTM_GETROUTE;
2265
a757997c
JU
2266 nl_attr_put32(&req.n, sizeof(req), RTA_IIF, mroute->ifindex);
2267 nl_attr_put32(&req.n, sizeof(req), RTA_OIF, mroute->ifindex);
2268 nl_attr_put32(&req.n, sizeof(req), RTA_SRC, mroute->sg.src.s_addr);
2269 nl_attr_put32(&req.n, sizeof(req), RTA_DST, mroute->sg.grp.s_addr);
5523c156
DS
2270 /*
2271 * What?
2272 *
2273 * So during the namespace cleanup we started storing
2274 * the zvrf table_id for the default table as RT_TABLE_MAIN
2275 * which is what the normal routing table for ip routing is.
2276 * This change caused this to break our lookups of sg data
2277 * because prior to this change the zvrf->table_id was 0
2278 * and when the pim multicast kernel code saw a 0,
2279 * it was auto-translated to RT_TABLE_DEFAULT. But since
2280 * we are now passing in RT_TABLE_MAIN there is no auto-translation
2281 * and the kernel goes screw you and the delicious cookies you
2282 * are trying to give me. So now we have this little hack.
2283 */
2284 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
2285 zvrf->table_id;
a757997c 2286 nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
e3be0432 2287
bd8b9272 2288 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
9bfadae8 2289 &zns->netlink_cmd, zns, false);
e3be0432 2290
bd8b9272 2291 mroute = NULL;
d62a17ae 2292 return suc;
e3be0432
DS
2293}
2294
8d03bc50
SW
2295/* Char length to debug ID with */
2296#define ID_LENGTH 10
2297
0be6e7d7 2298static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
8d03bc50 2299 uint32_t id,
e22e8001 2300 const struct nh_grp *z_grp,
0c8215cb 2301 const uint8_t count)
565ce0d3 2302{
565ce0d3 2303 struct nexthop_grp grp[count];
8d03bc50
SW
2304 /* Need space for max group size, "/", and null term */
2305 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
2306 char buf1[ID_LENGTH + 2];
2307
2308 buf[0] = '\0';
565ce0d3
SW
2309
2310 memset(grp, 0, sizeof(grp));
2311
2312 if (count) {
0c8215cb 2313 for (int i = 0; i < count; i++) {
e22e8001 2314 grp[i].id = z_grp[i].id;
df7fb580 2315 grp[i].weight = z_grp[i].weight - 1;
8d03bc50
SW
2316
2317 if (IS_ZEBRA_DEBUG_KERNEL) {
2318 if (i == 0)
2319 snprintf(buf, sizeof(buf1), "group %u",
2320 grp[i].id);
2321 else {
2322 snprintf(buf1, sizeof(buf1), "/%u",
2323 grp[i].id);
2324 strlcat(buf, buf1, sizeof(buf));
2325 }
2326 }
565ce0d3 2327 }
0be6e7d7
JU
2328 if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2329 count * sizeof(*grp)))
2330 return false;
565ce0d3 2331 }
8d03bc50
SW
2332
2333 if (IS_ZEBRA_DEBUG_KERNEL)
2334 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
0be6e7d7
JU
2335
2336 return true;
565ce0d3
SW
2337}
2338
f820d025 2339/**
e9a1cd93 2340 * Next hop packet encoding helper function.
f820d025 2341 *
e9a1cd93
RZ
2342 * \param[in] cmd netlink command.
2343 * \param[in] ctx dataplane context (information snapshot).
2344 * \param[out] buf buffer to hold the packet.
2345 * \param[in] buflen amount of buffer bytes.
f820d025 2346 *
0be6e7d7
JU
2347 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2348 * otherwise the number of bytes written to buf.
f820d025 2349 */
0be6e7d7
JU
2350ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2351 const struct zebra_dplane_ctx *ctx,
2352 void *buf, size_t buflen)
f820d025 2353{
f820d025
SW
2354 struct {
2355 struct nlmsghdr n;
2356 struct nhmsg nhm;
e9a1cd93
RZ
2357 char buf[];
2358 } *req = buf;
f820d025 2359
8d03bc50
SW
2360 mpls_lse_t out_lse[MPLS_MAX_LABELS];
2361 char label_buf[256];
2362 int num_labels = 0;
72938edf
SW
2363 uint32_t id = dplane_ctx_get_nhe_id(ctx);
2364 int type = dplane_ctx_get_nhe_type(ctx);
bdd085a8
MS
2365 struct rtattr *nest;
2366 uint16_t encap;
d4000d7b
DS
2367 struct nlsock *nl =
2368 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
72938edf
SW
2369
2370 if (!id) {
2371 flog_err(
2372 EC_ZEBRA_NHG_FIB_UPDATE,
2373 "Failed trying to update a nexthop group in the kernel that does not have an ID");
2374 return -1;
2375 }
81505946 2376
6c67f41f
SW
2377 /*
2378 * Nothing to do if the kernel doesn't support nexthop objects or
2379 * we dont want to install this type of NHG
2380 */
72938edf
SW
2381 if (!kernel_nexthops_supported()) {
2382 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2383 zlog_debug(
2384 "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring",
2385 __func__, id, zebra_route_string(type));
6c67f41f 2386 return 0;
72938edf
SW
2387 }
2388
2389 if (proto_nexthops_only() && !is_proto_nhg(id, type)) {
2390 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2391 zlog_debug(
2392 "%s: nhg_id %u (%s): proto-based nexthops only, ignoring",
2393 __func__, id, zebra_route_string(type));
2394 return 0;
2395 }
6c67f41f 2396
8d03bc50
SW
2397 label_buf[0] = '\0';
2398
0be6e7d7
JU
2399 if (buflen < sizeof(*req))
2400 return 0;
2401
2402 memset(req, 0, sizeof(*req));
f820d025 2403
e9a1cd93
RZ
2404 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2405 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
9a1588c4
SW
2406
2407 if (cmd == RTM_NEWNEXTHOP)
e9a1cd93 2408 req->n.nlmsg_flags |= NLM_F_REPLACE;
9a1588c4 2409
e9a1cd93 2410 req->n.nlmsg_type = cmd;
d4000d7b 2411 req->n.nlmsg_pid = nl->snl.nl_pid;
f820d025 2412
e9a1cd93 2413 req->nhm.nh_family = AF_UNSPEC;
fec211ad 2414 /* TODO: Scope? */
f820d025 2415
0be6e7d7
JU
2416 if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2417 return 0;
f820d025
SW
2418
2419 if (cmd == RTM_NEWNEXTHOP) {
bf1626a6
MS
2420 /*
2421 * We distinguish between a "group", which is a collection
2422 * of ids, and a singleton nexthop with an id. The
2423 * group is installed as an id that just refers to a list of
2424 * other ids.
2425 */
0be6e7d7 2426 if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
d52c949b 2427 if (!_netlink_nexthop_build_group(
0be6e7d7
JU
2428 &req->n, buflen, id,
2429 dplane_ctx_get_nhe_nh_grp(ctx),
2430 dplane_ctx_get_nhe_nh_grp_count(ctx)))
2431 return 0;
2432 } else {
0c8215cb
SW
2433 const struct nexthop *nh =
2434 dplane_ctx_get_nhe_ng(ctx)->nexthop;
2435 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
e8b0e420 2436
0c8215cb 2437 if (afi == AFI_IP)
e9a1cd93 2438 req->nhm.nh_family = AF_INET;
0c8215cb 2439 else if (afi == AFI_IP6)
e9a1cd93 2440 req->nhm.nh_family = AF_INET6;
f820d025 2441
565ce0d3 2442 switch (nh->type) {
a6e6a6d8 2443 case NEXTHOP_TYPE_IPV4:
565ce0d3 2444 case NEXTHOP_TYPE_IPV4_IFINDEX:
0be6e7d7
JU
2445 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2446 &nh->gate.ipv4,
2447 IPV4_MAX_BYTELEN))
2448 return 0;
565ce0d3 2449 break;
a6e6a6d8 2450 case NEXTHOP_TYPE_IPV6:
565ce0d3 2451 case NEXTHOP_TYPE_IPV6_IFINDEX:
0be6e7d7
JU
2452 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2453 &nh->gate.ipv6,
2454 IPV6_MAX_BYTELEN))
2455 return 0;
565ce0d3
SW
2456 break;
2457 case NEXTHOP_TYPE_BLACKHOLE:
0be6e7d7
JU
2458 if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2459 NULL, 0))
2460 return 0;
8d03bc50
SW
2461 /* Blackhole shouldn't have anymore attributes
2462 */
2463 goto nexthop_done;
565ce0d3
SW
2464 case NEXTHOP_TYPE_IFINDEX:
2465 /* Don't need anymore info for this */
2466 break;
a6e6a6d8
SW
2467 }
2468
2469 if (!nh->ifindex) {
565ce0d3
SW
2470 flog_err(
2471 EC_ZEBRA_NHG_FIB_UPDATE,
2472 "Context received for kernel nexthop update without an interface");
2473 return -1;
565ce0d3
SW
2474 }
2475
0be6e7d7
JU
2476 if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2477 nh->ifindex))
2478 return 0;
8d03bc50 2479
62d2ecb2 2480 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
e9a1cd93 2481 req->nhm.nh_flags |= RTNH_F_ONLINK;
62d2ecb2 2482
8d03bc50
SW
2483 num_labels =
2484 build_label_stack(nh->nh_label, out_lse,
2485 label_buf, sizeof(label_buf));
2486
2487 if (num_labels) {
2488 /* Set the BoS bit */
2489 out_lse[num_labels - 1] |=
2490 htonl(1 << MPLS_LS_S_SHIFT);
2491
2492 /*
2493 * TODO: MPLS unsupported for now in kernel.
2494 */
e9a1cd93 2495 if (req->nhm.nh_family == AF_MPLS)
8d03bc50 2496 goto nexthop_done;
bdd085a8
MS
2497
2498 encap = LWTUNNEL_ENCAP_MPLS;
2499 if (!nl_attr_put16(&req->n, buflen,
2500 NHA_ENCAP_TYPE, encap))
2501 return 0;
2502 nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP);
2503 if (!nest)
2504 return 0;
2505 if (!nl_attr_put(
2506 &req->n, buflen, MPLS_IPTUNNEL_DST,
2507 &out_lse,
2508 num_labels * sizeof(mpls_lse_t)))
2509 return 0;
2510
2511 nl_attr_nest_end(&req->n, nest);
8d03bc50
SW
2512 }
2513
eab0f8f0
HS
2514 if (nh->nh_srv6) {
2515 if (nh->nh_srv6->seg6local_action !=
2516 ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
2517 uint32_t action;
2518 uint16_t encap;
2519 struct rtattr *nest;
2520 const struct seg6local_context *ctx;
2521
2522 req->nhm.nh_family = AF_INET6;
2523 action = nh->nh_srv6->seg6local_action;
2524 ctx = &nh->nh_srv6->seg6local_ctx;
2525 encap = LWTUNNEL_ENCAP_SEG6_LOCAL;
2526 if (!nl_attr_put(&req->n, buflen,
2527 NHA_ENCAP_TYPE,
2528 &encap,
2529 sizeof(uint16_t)))
2530 return 0;
8689b25a 2531
eab0f8f0
HS
2532 nest = nl_attr_nest(&req->n, buflen,
2533 NHA_ENCAP | NLA_F_NESTED);
2534 if (!nest)
2535 return 0;
52026569 2536
eab0f8f0
HS
2537 switch (action) {
2538 case SEG6_LOCAL_ACTION_END:
2539 if (!nl_attr_put32(
2540 &req->n, buflen,
52026569
HS
2541 SEG6_LOCAL_ACTION,
2542 SEG6_LOCAL_ACTION_END))
eab0f8f0
HS
2543 return 0;
2544 break;
2545 case SEG6_LOCAL_ACTION_END_X:
2546 if (!nl_attr_put32(
2547 &req->n, buflen,
52026569
HS
2548 SEG6_LOCAL_ACTION,
2549 SEG6_LOCAL_ACTION_END_X))
eab0f8f0
HS
2550 return 0;
2551 if (!nl_attr_put(
2552 &req->n, buflen,
8689b25a 2553 SEG6_LOCAL_NH6, &ctx->nh6,
52026569 2554 sizeof(struct in6_addr)))
eab0f8f0
HS
2555 return 0;
2556 break;
2557 case SEG6_LOCAL_ACTION_END_T:
2558 if (!nl_attr_put32(
2559 &req->n, buflen,
52026569
HS
2560 SEG6_LOCAL_ACTION,
2561 SEG6_LOCAL_ACTION_END_T))
eab0f8f0
HS
2562 return 0;
2563 if (!nl_attr_put32(
2564 &req->n, buflen,
52026569
HS
2565 SEG6_LOCAL_TABLE,
2566 ctx->table))
eab0f8f0
HS
2567 return 0;
2568 break;
2569 case SEG6_LOCAL_ACTION_END_DX4:
2570 if (!nl_attr_put32(
2571 &req->n, buflen,
52026569
HS
2572 SEG6_LOCAL_ACTION,
2573 SEG6_LOCAL_ACTION_END_DX4))
eab0f8f0
HS
2574 return 0;
2575 if (!nl_attr_put(
2576 &req->n, buflen,
8689b25a 2577 SEG6_LOCAL_NH4, &ctx->nh4,
52026569 2578 sizeof(struct in_addr)))
eab0f8f0
HS
2579 return 0;
2580 break;
2581 case SEG6_LOCAL_ACTION_END_DT6:
2582 if (!nl_attr_put32(
2583 &req->n, buflen,
52026569
HS
2584 SEG6_LOCAL_ACTION,
2585 SEG6_LOCAL_ACTION_END_DT6))
eab0f8f0
HS
2586 return 0;
2587 if (!nl_attr_put32(
2588 &req->n, buflen,
52026569
HS
2589 SEG6_LOCAL_TABLE,
2590 ctx->table))
eab0f8f0
HS
2591 return 0;
2592 break;
7eab60a7
RS
2593 case SEG6_LOCAL_ACTION_END_DT4:
2594 if (!nl_attr_put32(
2595 &req->n, buflen,
2596 SEG6_LOCAL_ACTION,
2597 SEG6_LOCAL_ACTION_END_DT4))
2598 return 0;
2599 if (!nl_attr_put32(
2600 &req->n, buflen,
2601 SEG6_LOCAL_VRFTABLE,
2602 ctx->table))
2603 return 0;
2604 break;
eab0f8f0
HS
2605 default:
2606 zlog_err("%s: unsupport seg6local behaviour action=%u",
2607 __func__, action);
0a543b79 2608 return 0;
eab0f8f0
HS
2609 }
2610 nl_attr_nest_end(&req->n, nest);
8689b25a 2611 }
8689b25a 2612
eab0f8f0
HS
2613 if (!sid_zero(&nh->nh_srv6->seg6_segs)) {
2614 char tun_buf[4096];
2615 ssize_t tun_len;
2616 struct rtattr *nest;
76fb7ae4 2617
eab0f8f0
HS
2618 if (!nl_attr_put16(&req->n, buflen,
2619 NHA_ENCAP_TYPE,
2620 LWTUNNEL_ENCAP_SEG6))
2621 return 0;
2622 nest = nl_attr_nest(&req->n, buflen,
2623 NHA_ENCAP | NLA_F_NESTED);
2624 if (!nest)
2625 return 0;
2626 tun_len = fill_seg6ipt_encap(tun_buf,
2627 sizeof(tun_buf),
2628 &nh->nh_srv6->seg6_segs);
2629 if (tun_len < 0)
2630 return 0;
2631 if (!nl_attr_put(&req->n, buflen,
2632 SEG6_IPTUNNEL_SRH,
2633 tun_buf, tun_len))
2634 return 0;
2635 nl_attr_nest_end(&req->n, nest);
2636 }
76fb7ae4
HS
2637 }
2638
bf1626a6
MS
2639nexthop_done:
2640
2641 if (IS_ZEBRA_DEBUG_KERNEL)
2c77ddee
DS
2642 zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2643 __func__, id, nh, nh->ifindex,
bd47f3a3
JU
2644 vrf_id_to_name(nh->vrf_id),
2645 nh->vrf_id, label_buf);
bdd085a8 2646 }
f820d025 2647
bdd085a8 2648 req->nhm.nh_protocol = zebra2proto(type);
f820d025 2649
f820d025
SW
2650 } else if (cmd != RTM_DELNEXTHOP) {
2651 flog_err(
2652 EC_ZEBRA_NHG_FIB_UPDATE,
2653 "Nexthop group kernel update command (%d) does not exist",
2654 cmd);
2655 return -1;
2656 }
2657
9266b315
RZ
2658 if (IS_ZEBRA_DEBUG_KERNEL)
2659 zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2660 id);
f820d025 2661
e9a1cd93 2662 return NLMSG_ALIGN(req->n.nlmsg_len);
f820d025
SW
2663}
2664
67e3369e
JU
2665static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx,
2666 void *buf, size_t buflen)
f820d025 2667{
bf1626a6 2668 enum dplane_op_e op;
98cda54a 2669 int cmd = 0;
f820d025 2670
bf1626a6
MS
2671 op = dplane_ctx_get_op(ctx);
2672 if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
f820d025 2673 cmd = RTM_NEWNEXTHOP;
bf1626a6
MS
2674 else if (op == DPLANE_OP_NH_DELETE)
2675 cmd = RTM_DELNEXTHOP;
2676 else {
2677 flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2678 "Context received for kernel nexthop update with incorrect OP code (%u)",
2679 op);
67e3369e 2680 return -1;
f820d025
SW
2681 }
2682
67e3369e
JU
2683 return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen);
2684}
2685
67e3369e
JU
2686enum netlink_msg_status
2687netlink_put_nexthop_update_msg(struct nl_batch *bth,
2688 struct zebra_dplane_ctx *ctx)
2689{
e9a1cd93
RZ
2690 /* Nothing to do if the kernel doesn't support nexthop objects */
2691 if (!kernel_nexthops_supported())
67e3369e 2692 return FRR_NETLINK_SUCCESS;
e9a1cd93 2693
67e3369e
JU
2694 return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder,
2695 false);
2696}
f820d025 2697
67e3369e
JU
2698static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2699 void *buf, size_t buflen)
2700{
2701 return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf,
2702 buflen, false, false);
f820d025
SW
2703}
2704
67e3369e
JU
2705static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2706 void *buf, size_t buflen)
2707{
2708 return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf,
2709 buflen, false, false);
2710}
2711
2712enum netlink_msg_status
2713netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
7cdb1a84 2714{
67e3369e 2715 int cmd;
7cdb1a84
MS
2716 const struct prefix *p = dplane_ctx_get_dest(ctx);
2717
2718 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2719 cmd = RTM_DELROUTE;
2720 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2721 cmd = RTM_NEWROUTE;
2722 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2723
2724 if (p->family == AF_INET || v6_rr_semantics) {
2725 /* Single 'replace' operation */
fe5f21af
DS
2726
2727 /*
2728 * With route replace semantics in place
2729 * for v4 routes and the new route is a system
2730 * route we do not install anything.
2731 * The problem here is that the new system
2732 * route should cause us to withdraw from
2733 * the kernel the old non-system route
2734 */
67e3369e
JU
2735 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))
2736 && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
6b390b3c 2737 return netlink_batch_add_msg(
67e3369e
JU
2738 bth, ctx, netlink_delroute_msg_encoder,
2739 true);
7cdb1a84
MS
2740 } else {
2741 /*
2742 * So v6 route replace semantics are not in
2743 * the kernel at this point as I understand it.
2744 * so let's do a delete then an add.
2745 * In the future once v6 route replace semantics
2746 * are in we can figure out what to do here to
2747 * allow working with old and new kernels.
2748 *
2749 * I'm also intentionally ignoring the failure case
2750 * of the route delete. If that happens yeah we're
2751 * screwed.
2752 */
67e3369e
JU
2753 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2754 netlink_batch_add_msg(
2755 bth, ctx, netlink_delroute_msg_encoder,
2756 true);
7cdb1a84
MS
2757 }
2758
67e3369e
JU
2759 cmd = RTM_NEWROUTE;
2760 } else
2761 return FRR_NETLINK_ERROR;
7cdb1a84 2762
67e3369e
JU
2763 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
2764 return FRR_NETLINK_SUCCESS;
0be6e7d7 2765
67e3369e
JU
2766 return netlink_batch_add_msg(bth, ctx,
2767 cmd == RTM_NEWROUTE
2768 ? netlink_newroute_msg_encoder
2769 : netlink_delroute_msg_encoder,
2770 false);
2771}
7cdb1a84 2772
d9f5b2f5
SW
2773/**
2774 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2775 *
2776 * @tb: Netlink RTA data
2777 * @family: Address family in the nhmsg
8c0a24c1 2778 * @ifp: Interface connected - this should be NULL, we fill it in
d9f5b2f5
SW
2779 * @ns_id: Namspace id
2780 *
2781 * Return: New nexthop
2782 */
e22e8001
SW
2783static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2784 unsigned char family,
2785 struct interface **ifp,
2786 ns_id_t ns_id)
d9f5b2f5 2787{
e22e8001 2788 struct nexthop nh = {};
d9f5b2f5 2789 void *gate = NULL;
8e401b25 2790 enum nexthop_types_t type = 0;
e22e8001
SW
2791 int if_index = 0;
2792 size_t sz = 0;
7134ba70 2793 struct interface *ifp_lookup;
d9f5b2f5
SW
2794
2795 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2796
8e401b25 2797
d9f5b2f5
SW
2798 if (tb[NHA_GATEWAY]) {
2799 switch (family) {
2800 case AF_INET:
8e401b25 2801 type = NEXTHOP_TYPE_IPV4_IFINDEX;
d9f5b2f5
SW
2802 sz = 4;
2803 break;
2804 case AF_INET6:
8e401b25 2805 type = NEXTHOP_TYPE_IPV6_IFINDEX;
d9f5b2f5
SW
2806 sz = 16;
2807 break;
2808 default:
2809 flog_warn(
2810 EC_ZEBRA_BAD_NHG_MESSAGE,
c4239c05 2811 "Nexthop gateway with bad address family (%d) received from kernel",
d9f5b2f5 2812 family);
e22e8001 2813 return nh;
d9f5b2f5
SW
2814 }
2815 gate = RTA_DATA(tb[NHA_GATEWAY]);
e22e8001 2816 } else
8e401b25 2817 type = NEXTHOP_TYPE_IFINDEX;
d9f5b2f5 2818
8e401b25 2819 if (type)
e22e8001 2820 nh.type = type;
8e401b25
SW
2821
2822 if (gate)
e22e8001 2823 memcpy(&(nh.gate), gate, sz);
8e401b25
SW
2824
2825 if (if_index)
e22e8001 2826 nh.ifindex = if_index;
8e401b25 2827
7134ba70
DS
2828 ifp_lookup =
2829 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
2830
e22e8001 2831 if (ifp)
7134ba70
DS
2832 *ifp = ifp_lookup;
2833 if (ifp_lookup)
096f7609 2834 nh.vrf_id = ifp_lookup->vrf->vrf_id;
e22e8001 2835 else {
d9f5b2f5
SW
2836 flog_warn(
2837 EC_ZEBRA_UNKNOWN_INTERFACE,
2838 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
15569c58 2839 __func__, nh.ifindex);
d9f5b2f5 2840
e22e8001 2841 nh.vrf_id = VRF_DEFAULT;
d9f5b2f5
SW
2842 }
2843
2844 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
2845 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
2846 int num_labels = 0;
6e728764 2847
d9f5b2f5
SW
2848 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
2849
e22e8001 2850 if (encap_type == LWTUNNEL_ENCAP_MPLS)
d9f5b2f5 2851 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
d9f5b2f5 2852
e22e8001
SW
2853 if (num_labels)
2854 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
d9f5b2f5 2855 labels);
d9f5b2f5
SW
2856 }
2857
2858 return nh;
2859}
2860
85f5e761 2861static int netlink_nexthop_process_group(struct rtattr **tb,
5a935f79 2862 struct nh_grp *z_grp, int z_grp_size)
d9f5b2f5 2863{
e22e8001
SW
2864 uint8_t count = 0;
2865 /* linux/nexthop.h group struct */
d9f5b2f5
SW
2866 struct nexthop_grp *n_grp = NULL;
2867
85f5e761 2868 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
d9f5b2f5
SW
2869 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
2870
2871 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
2872 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
2873 "Invalid nexthop group received from the kernel");
85f5e761 2874 return count;
d9f5b2f5
SW
2875 }
2876
5a935f79 2877 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
e22e8001 2878 z_grp[i].id = n_grp[i].id;
df7fb580 2879 z_grp[i].weight = n_grp[i].weight + 1;
85f5e761 2880 }
d9f5b2f5
SW
2881 return count;
2882}
2883
2884/**
2885 * netlink_nexthop_change() - Read in change about nexthops from the kernel
2886 *
2887 * @h: Netlink message header
2888 * @ns_id: Namspace id
2889 * @startup: Are we reading under startup conditions?
2890 *
2891 * Return: Result status
2892 */
2893int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2894{
2895 int len;
2896 /* nexthop group id */
2897 uint32_t id;
2898 unsigned char family;
38e40db1 2899 int type;
e8b0e420 2900 afi_t afi = AFI_UNSPEC;
946de1b9 2901 vrf_id_t vrf_id = VRF_DEFAULT;
8c0a24c1 2902 struct interface *ifp = NULL;
d9f5b2f5 2903 struct nhmsg *nhm = NULL;
e22e8001
SW
2904 struct nexthop nh = {};
2905 struct nh_grp grp[MULTIPATH_NUM] = {};
85f5e761 2906 /* Count of nexthops in group array */
e22e8001 2907 uint8_t grp_count = 0;
e22e8001 2908 struct rtattr *tb[NHA_MAX + 1] = {};
d9f5b2f5 2909
1d80c209
DS
2910 frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup);
2911
d9f5b2f5
SW
2912 nhm = NLMSG_DATA(h);
2913
88cafda7
DS
2914 if (ns_id)
2915 vrf_id = ns_id;
2916
d9f5b2f5
SW
2917 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
2918 return 0;
2919
2920 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
2921 if (len < 0) {
2922 zlog_warn(
2923 "%s: Message received from netlink is of a broken size %d %zu",
15569c58 2924 __func__, h->nlmsg_len,
d9f5b2f5
SW
2925 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
2926 return -1;
2927 }
2928
6e1e2e8d
DS
2929 netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len,
2930 NLA_F_NESTED);
d9f5b2f5
SW
2931
2932
2933 if (!tb[NHA_ID]) {
2934 flog_warn(
2935 EC_ZEBRA_BAD_NHG_MESSAGE,
2936 "Nexthop group without an ID received from the kernel");
2937 return -1;
2938 }
2939
2940 /* We use the ID key'd nhg table for kernel updates */
2941 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
d9f5b2f5 2942
506efd37
AK
2943 if (zebra_evpn_mh_is_fdb_nh(id)) {
2944 /* If this is a L2 NH just ignore it */
2945 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
2946 zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
2947 h->nlmsg_type, id);
2948 }
2949 return 0;
2950 }
2951
e8b0e420 2952 family = nhm->nh_family;
e8b0e420
SW
2953 afi = family2afi(family);
2954
38e40db1
SW
2955 type = proto2zebra(nhm->nh_protocol, 0, true);
2956
fdee485a
SW
2957 if (IS_ZEBRA_DEBUG_KERNEL)
2958 zlog_debug("%s ID (%u) %s NS %u",
2959 nl_msg_type_to_str(h->nlmsg_type), id,
2960 nl_family_to_str(family), ns_id);
2961
2962
d9f5b2f5
SW
2963 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
2964 if (tb[NHA_GROUP]) {
2965 /**
2966 * If this is a group message its only going to have
2967 * an array of nexthop IDs associated with it
2968 */
5a935f79
SW
2969 grp_count = netlink_nexthop_process_group(
2970 tb, grp, array_size(grp));
85f5e761
SW
2971 } else {
2972 if (tb[NHA_BLACKHOLE]) {
2973 /**
2974 * This nexthop is just for blackhole-ing
2975 * traffic, it should not have an OIF, GATEWAY,
2976 * or ENCAP
2977 */
e22e8001
SW
2978 nh.type = NEXTHOP_TYPE_BLACKHOLE;
2979 nh.bh_type = BLACKHOLE_UNSPEC;
2980 } else if (tb[NHA_OIF])
85f5e761
SW
2981 /**
2982 * This is a true new nexthop, so we need
2983 * to parse the gateway and device info
2984 */
2985 nh = netlink_nexthop_process_nh(tb, family,
2986 &ifp, ns_id);
e22e8001
SW
2987 else {
2988
8e401b25
SW
2989 flog_warn(
2990 EC_ZEBRA_BAD_NHG_MESSAGE,
2991 "Invalid Nexthop message received from the kernel with ID (%u)",
2992 id);
2993 return -1;
2994 }
e22e8001
SW
2995 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
2996 if (nhm->nh_flags & RTNH_F_ONLINK)
2997 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
2998 vrf_id = nh.vrf_id;
d9f5b2f5
SW
2999 }
3000
38e40db1
SW
3001 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
3002 type, startup))
e22e8001 3003 return -1;
8e401b25 3004
9a1588c4 3005 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
88cafda7 3006 zebra_nhg_kernel_del(id, vrf_id);
d9f5b2f5 3007
d9f5b2f5
SW
3008 return 0;
3009}
3010
3011/**
3012 * netlink_request_nexthop() - Request nextop information from the kernel
3013 * @zns: Zebra namespace
3014 * @family: AF_* netlink family
3015 * @type: RTM_* route type
3016 *
3017 * Return: Result status
3018 */
3019static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
3020{
3021 struct {
3022 struct nlmsghdr n;
3023 struct nhmsg nhm;
3024 } req;
3025
3026 /* Form the request, specifying filter (rtattr) if needed. */
3027 memset(&req, 0, sizeof(req));
3028 req.n.nlmsg_type = type;
3029 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3030 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3031 req.nhm.nh_family = family;
3032
fd3f8e52 3033 return netlink_request(&zns->netlink_cmd, &req);
d9f5b2f5
SW
3034}
3035
7d5bb02b 3036
d9f5b2f5
SW
3037/**
3038 * netlink_nexthop_read() - Nexthop read function using netlink interface
3039 *
3040 * @zns: Zebra name space
3041 *
3042 * Return: Result status
3043 * Only called at bootstrap time.
3044 */
3045int netlink_nexthop_read(struct zebra_ns *zns)
3046{
3047 int ret;
3048 struct zebra_dplane_info dp_info;
3049
3050 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3051
3052 /* Get nexthop objects */
3053 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
3054 if (ret < 0)
3055 return ret;
3056 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
9bfadae8 3057 &dp_info, 0, true);
81505946
SW
3058
3059 if (!ret)
3060 /* If we succesfully read in nexthop objects,
3061 * this kernel must support them.
3062 */
3063 supports_nh = true;
7c99d51b
MS
3064 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
3065 zlog_debug("Nexthop objects %ssupported on this kernel",
3066 supports_nh ? "" : "not ");
090ee856
DS
3067
3068 zebra_router_set_supports_nhgs(supports_nh);
81505946 3069
60e0eaee 3070 return ret;
d9f5b2f5
SW
3071}
3072
3073
05657ec2
PG
3074int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen,
3075 ns_id_t ns_id, uint8_t family, bool permanent)
6b8a5694 3076{
d62a17ae 3077 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
05657ec2
PG
3078 addr, lla, llalen, ns_id, family, permanent,
3079 RTPROT_ZEBRA);
6b8a5694 3080}
718e3744 3081
340845e2 3082/**
0be6e7d7
JU
3083 * netlink_neigh_update_msg_encode() - Common helper api for encoding
3084 * evpn neighbor update as netlink messages using dataplane context object.
bbd4285b 3085 * Here, a neighbor refers to a bridge forwarding database entry for
3086 * either unicast forwarding or head-end replication or an IP neighbor
3087 * entry.
340845e2
JU
3088 * @ctx: Dataplane context
3089 * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
0a27a2fe
PG
3090 * @lla: A pointer to neighbor cache link layer address
3091 * @llalen: Length of the pointer to neighbor cache link layer
3092 * address
340845e2 3093 * @ip: A neighbor cache n/w layer destination address
bbd4285b 3094 * In the case of bridge FDB, this represnts the remote
3095 * VTEP IP.
340845e2
JU
3096 * @replace_obj: Whether NEW request should replace existing object or
3097 * add to the end of the list
3098 * @family: AF_* netlink family
3099 * @type: RTN_* route type
3100 * @flags: NTF_* flags
3101 * @state: NUD_* states
d4d4ec1c
RZ
3102 * @data: data buffer pointer
3103 * @datalen: total amount of data buffer space
0a27a2fe 3104 * @protocol: protocol information
340845e2 3105 *
0be6e7d7
JU
3106 * Return: 0 when the msg doesn't fit entirely in the buffer
3107 * otherwise the number of bytes written to buf.
13d60d35 3108 */
0be6e7d7 3109static ssize_t netlink_neigh_update_msg_encode(
0a27a2fe
PG
3110 const struct zebra_dplane_ctx *ctx, int cmd, const void *lla,
3111 int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family,
3112 uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy,
ccd187cd 3113 uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data,
0a27a2fe 3114 size_t datalen, uint8_t protocol)
13d60d35 3115{
d62a17ae 3116 struct {
3117 struct nlmsghdr n;
3118 struct ndmsg ndm;
d4d4ec1c
RZ
3119 char buf[];
3120 } *req = data;
340845e2
JU
3121 int ipa_len;
3122 enum dplane_op_e op;
d62a17ae 3123
0be6e7d7
JU
3124 if (datalen < sizeof(*req))
3125 return 0;
45c80fbd 3126 memset(req, 0, sizeof(*req));
d62a17ae 3127
340845e2
JU
3128 op = dplane_ctx_get_op(ctx);
3129
d4d4ec1c
RZ
3130 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3131 req->n.nlmsg_flags = NLM_F_REQUEST;
d62a17ae 3132 if (cmd == RTM_NEWNEIGH)
d4d4ec1c 3133 req->n.nlmsg_flags |=
340845e2
JU
3134 NLM_F_CREATE
3135 | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
d4d4ec1c
RZ
3136 req->n.nlmsg_type = cmd;
3137 req->ndm.ndm_family = family;
3138 req->ndm.ndm_type = type;
3139 req->ndm.ndm_state = state;
3140 req->ndm.ndm_flags = flags;
3141 req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
d62a17ae 3142
45c80fbd 3143 if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
0be6e7d7
JU
3144 sizeof(protocol)))
3145 return 0;
3146
0a27a2fe
PG
3147 if (lla) {
3148 if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen))
0be6e7d7
JU
3149 return 0;
3150 }
13d60d35 3151
f188e68e 3152 if (nfy) {
4bcdb608
NA
3153 struct rtattr *nest;
3154
3155 nest = nl_attr_nest(&req->n, datalen,
3156 NDA_FDB_EXT_ATTRS | NLA_F_NESTED);
3157 if (!nest)
3158 return 0;
3159
3160 if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY,
3161 &nfy_flags, sizeof(nfy_flags)))
f188e68e 3162 return 0;
4bcdb608
NA
3163 if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0))
3164 return 0;
3165
3166 nl_attr_nest_end(&req->n, nest);
f188e68e 3167 }
506efd37 3168
4bcdb608 3169
ccd187cd
AK
3170 if (ext) {
3171 if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags,
3172 sizeof(ext_flags)))
3173 return 0;
3174 }
3175
80e19eb7
AK
3176 if (nhg_id) {
3177 if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
3178 return 0;
3179 } else {
3180 ipa_len =
3181 IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
3182 if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr,
3183 ipa_len))
3184 return 0;
3185 }
340845e2
JU
3186
3187 if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
3188 vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
13d60d35 3189
0be6e7d7
JU
3190 if (vid > 0) {
3191 if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
3192 return 0;
3193 }
13d60d35 3194
0be6e7d7
JU
3195 if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
3196 dplane_ctx_mac_get_br_ifindex(ctx)))
3197 return 0;
340845e2 3198 }
13d60d35 3199
d4d4ec1c 3200 return NLMSG_ALIGN(req->n.nlmsg_len);
13d60d35 3201}
3202
340845e2
JU
3203/*
3204 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
3205 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
3206 */
67e3369e
JU
3207static ssize_t
3208netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd,
3209 void *buf, size_t buflen)
340845e2
JU
3210{
3211 struct ethaddr dst_mac = {.octet = {0}};
88217099
PG
3212 int proto = RTPROT_ZEBRA;
3213
3214 if (dplane_ctx_get_type(ctx) != 0)
3215 proto = zebra2proto(dplane_ctx_get_type(ctx));
d4d4ec1c 3216
67e3369e 3217 return netlink_neigh_update_msg_encode(
0a27a2fe
PG
3218 ctx, cmd, (const void *)&dst_mac, ETH_ALEN,
3219 dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF,
3220 (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/,
3221 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen,
88217099 3222 proto);
340845e2
JU
3223}
3224
2232a77c 3225#ifndef NDA_RTA
d62a17ae 3226#define NDA_RTA(r) \
3227 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2232a77c 3228#endif
3229
2414abd3 3230static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 3231{
d62a17ae 3232 struct ndmsg *ndm;
3233 struct interface *ifp;
3234 struct zebra_if *zif;
d62a17ae 3235 struct rtattr *tb[NDA_MAX + 1];
3236 struct interface *br_if;
3237 struct ethaddr mac;
3238 vlanid_t vid = 0;
4b3f26f4 3239 struct in_addr vtep_ip;
d62a17ae 3240 int vid_present = 0, dst_present = 0;
d62a17ae 3241 char vid_buf[20];
3242 char dst_buf[30];
a37f4598 3243 bool sticky;
f188e68e
AK
3244 bool local_inactive = false;
3245 bool dp_static = false;
3246 uint32_t nhg_id = 0;
d62a17ae 3247
3248 ndm = NLMSG_DATA(h);
3249
2853fed6 3250 /* We only process macfdb notifications if EVPN is enabled */
3251 if (!is_evpn_enabled())
3252 return 0;
3253
4b3f26f4 3254 /* Parse attributes and extract fields of interest. Do basic
3255 * validation of the fields.
3256 */
4bcdb608
NA
3257 netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len,
3258 NLA_F_NESTED);
d62a17ae 3259
3260 if (!tb[NDA_LLADDR]) {
28bd0652 3261 if (IS_ZEBRA_DEBUG_KERNEL)
4b3f26f4 3262 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
28bd0652 3263 nl_msg_type_to_str(h->nlmsg_type),
4b3f26f4 3264 ndm->ndm_ifindex);
d62a17ae 3265 return 0;
3266 }
3267
ff8b7eb8 3268 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
3269 if (IS_ZEBRA_DEBUG_KERNEL)
3270 zlog_debug(
4b3f26f4 3271 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
3272 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
28bd0652 3273 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 3274 return 0;
3275 }
3276
ff8b7eb8 3277 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 3278
2be18df4 3279 if (tb[NDA_VLAN]) {
d62a17ae 3280 vid_present = 1;
d7c0a89a 3281 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
772270f3 3282 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
d62a17ae 3283 }
3284
3285 if (tb[NDA_DST]) {
3286 /* TODO: Only IPv4 supported now. */
3287 dst_present = 1;
4b3f26f4 3288 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
d62a17ae 3289 IPV4_MAX_BYTELEN);
9bcef951
MS
3290 snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4",
3291 &vtep_ip);
d62a17ae 3292 }
3293
f188e68e
AK
3294 if (tb[NDA_NH_ID])
3295 nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
3296
3297 if (ndm->ndm_state & NUD_STALE)
3298 local_inactive = true;
3299
4bcdb608
NA
3300 if (tb[NDA_FDB_EXT_ATTRS]) {
3301 struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS];
3302 struct rtattr *nfea_tb[NFEA_MAX + 1] = {0};
3303
3304 netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr);
3305 if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
3306 uint8_t nfy_flags;
f188e68e 3307
4bcdb608
NA
3308 nfy_flags = *(uint8_t *)RTA_DATA(
3309 nfea_tb[NFEA_ACTIVITY_NOTIFY]);
3310 if (nfy_flags & FDB_NOTIFY_BIT)
3311 dp_static = true;
3312 if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT)
3313 local_inactive = true;
3314 }
f188e68e
AK
3315 }
3316
d62a17ae 3317 if (IS_ZEBRA_DEBUG_KERNEL)
ef7b8be4 3318 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d",
d62a17ae 3319 nl_msg_type_to_str(h->nlmsg_type),
d62a17ae 3320 ndm->ndm_ifindex, vid_present ? vid_buf : "",
ef7b8be4 3321 ndm->ndm_state, ndm->ndm_flags, &mac,
f188e68e 3322 dst_present ? dst_buf : "", nhg_id);
d62a17ae 3323
4b3f26f4 3324 /* The interface should exist. */
3325 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3326 ndm->ndm_ifindex);
3327 if (!ifp || !ifp->info)
3328 return 0;
3329
3330 /* The interface should be something we're interested in. */
3331 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
3332 return 0;
3333
3334 zif = (struct zebra_if *)ifp->info;
3335 if ((br_if = zif->brslave_info.br_if) == NULL) {
3336 if (IS_ZEBRA_DEBUG_KERNEL)
3337 zlog_debug(
3338 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
3339 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
3340 ndm->ndm_ifindex,
3341 zif->brslave_info.bridge_ifindex);
3342 return 0;
3343 }
3344
f188e68e 3345 sticky = !!(ndm->ndm_flags & NTF_STICKY);
4b3f26f4 3346
28bd0652
DS
3347 if (filter_vlan && vid != filter_vlan) {
3348 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e 3349 zlog_debug(" Filtered due to filter vlan: %d",
28bd0652 3350 filter_vlan);
d62a17ae 3351 return 0;
28bd0652 3352 }
d62a17ae 3353
3354 /* If add or update, do accordingly if learnt on a "local" interface; if
3355 * the notification is over VxLAN, this has to be related to
3356 * multi-homing,
3357 * so perform an implicit delete of any local entry (if it exists).
3358 */
3359 if (h->nlmsg_type == RTM_NEWNEIGH) {
4b3f26f4 3360 /* Drop "permanent" entries. */
3361 if (ndm->ndm_state & NUD_PERMANENT) {
3362 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e
DL
3363 zlog_debug(
3364 " Dropping entry because of NUD_PERMANENT");
3365 return 0;
4b3f26f4 3366 }
3367
d62a17ae 3368 if (IS_ZEBRA_IF_VXLAN(ifp))
15400f95
AK
3369 return zebra_vxlan_dp_network_mac_add(
3370 ifp, br_if, &mac, vid, nhg_id, sticky,
3371 !!(ndm->ndm_flags & NTF_EXT_LEARNED));
d62a17ae 3372
3373 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
f188e68e 3374 sticky, local_inactive, dp_static);
d62a17ae 3375 }
3376
3377 /* This is a delete notification.
4b3f26f4 3378 * Ignore the notification with IP dest as it may just signify that the
3379 * MAC has moved from remote to local. The exception is the special
3380 * all-zeros MAC that represents the BUM flooding entry; we may have
3381 * to readd it. Otherwise,
d62a17ae 3382 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
3383 * 2. For a MAC over "local" interface, delete the mac
3384 * Note: We will get notifications from both bridge driver and VxLAN
3385 * driver.
d62a17ae 3386 */
f188e68e
AK
3387 if (nhg_id)
3388 return 0;
3389
28bd0652 3390 if (dst_present) {
4b3f26f4 3391 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
3392
3393 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
3394 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
d62a17ae 3395 return 0;
28bd0652 3396 }
d62a17ae 3397
3398 if (IS_ZEBRA_IF_VXLAN(ifp))
15400f95 3399 return zebra_vxlan_dp_network_mac_del(ifp, br_if, &mac, vid);
d62a17ae 3400
3401 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2232a77c 3402}
3403
2414abd3 3404static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 3405{
d62a17ae 3406 int len;
3407 struct ndmsg *ndm;
2232a77c 3408
d62a17ae 3409 if (h->nlmsg_type != RTM_NEWNEIGH)
3410 return 0;
2232a77c 3411
d62a17ae 3412 /* Length validity. */
3413 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3414 if (len < 0)
3415 return -1;
2232a77c 3416
d62a17ae 3417 /* We are interested only in AF_BRIDGE notifications. */
3418 ndm = NLMSG_DATA(h);
3419 if (ndm->ndm_family != AF_BRIDGE)
3420 return 0;
2232a77c 3421
2414abd3 3422 return netlink_macfdb_change(h, len, ns_id);
2232a77c 3423}
3424
3425/* Request for MAC FDB information from the kernel */
85a75f1e
MS
3426static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
3427 int type, ifindex_t master_ifindex)
2232a77c 3428{
d62a17ae 3429 struct {
3430 struct nlmsghdr n;
3431 struct ifinfomsg ifm;
3432 char buf[256];
3433 } req;
3434
3435 /* Form the request, specifying filter (rtattr) if needed. */
3436 memset(&req, 0, sizeof(req));
3437 req.n.nlmsg_type = type;
718f9b0f 3438 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 3439 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
3440 req.ifm.ifi_family = family;
3441 if (master_ifindex)
312a6bee 3442 nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
d62a17ae 3443
fd3f8e52 3444 return netlink_request(netlink_cmd, &req);
2232a77c 3445}
3446
3447/*
3448 * MAC forwarding database read using netlink interface. This is invoked
3449 * at startup.
3450 */
d62a17ae 3451int netlink_macfdb_read(struct zebra_ns *zns)
2232a77c 3452{
d62a17ae 3453 int ret;
85a75f1e
MS
3454 struct zebra_dplane_info dp_info;
3455
3456 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 3457
3458 /* Get bridge FDB table. */
85a75f1e
MS
3459 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3460 0);
d62a17ae 3461 if (ret < 0)
3462 return ret;
3463 /* We are reading entire table. */
3464 filter_vlan = 0;
85a75f1e 3465 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
9bfadae8 3466 &dp_info, 0, true);
d62a17ae 3467
3468 return ret;
2232a77c 3469}
3470
3471/*
3472 * MAC forwarding database read using netlink interface. This is for a
3473 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3474 */
d62a17ae 3475int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3476 struct interface *br_if)
2232a77c 3477{
d62a17ae 3478 struct zebra_if *br_zif;
3479 struct zebra_if *zif;
3480 struct zebra_l2info_vxlan *vxl;
85a75f1e 3481 struct zebra_dplane_info dp_info;
d62a17ae 3482 int ret = 0;
3483
85a75f1e 3484 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 3485
3486 /* Save VLAN we're filtering on, if needed. */
3487 br_zif = (struct zebra_if *)br_if->info;
3488 zif = (struct zebra_if *)ifp->info;
3489 vxl = &zif->l2info.vxl;
3490 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3491 filter_vlan = vxl->access_vlan;
3492
3493 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3494 */
85a75f1e 3495 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
d62a17ae 3496 br_if->ifindex);
3497 if (ret < 0)
3498 return ret;
85a75f1e 3499 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
9bfadae8 3500 &dp_info, 0, false);
d62a17ae 3501
3502 /* Reset VLAN filter. */
3503 filter_vlan = 0;
3504 return ret;
2232a77c 3505}
3506
67fb9374
CS
3507
3508/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3509static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
1a3bd37f 3510 int family, int type,
67fb9374 3511 struct interface *br_if,
1a3bd37f 3512 const struct ethaddr *mac,
67fb9374
CS
3513 vlanid_t vid)
3514{
3515 struct {
3516 struct nlmsghdr n;
3517 struct ndmsg ndm;
3518 char buf[256];
3519 } req;
3520 struct zebra_if *br_zif;
67fb9374
CS
3521
3522 memset(&req, 0, sizeof(req));
3523 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3524 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3525 req.n.nlmsg_flags = NLM_F_REQUEST;
3526 req.ndm.ndm_family = family; /* AF_BRIDGE */
3527 /* req.ndm.ndm_state = NUD_REACHABLE; */
3528
312a6bee 3529 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
67fb9374
CS
3530
3531 br_zif = (struct zebra_if *)br_if->info;
3532 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
312a6bee 3533 nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
67fb9374 3534
312a6bee 3535 nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
67fb9374
CS
3536
3537 if (IS_ZEBRA_DEBUG_KERNEL)
bd47f3a3 3538 zlog_debug(
ef7b8be4 3539 "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %pEA vid %u",
bd47f3a3 3540 __func__, nl_family_to_str(req.ndm.ndm_family),
096f7609
IR
3541 br_if->name, br_if->ifindex, br_if->vrf->name,
3542 br_if->vrf->vrf_id, mac, vid);
67fb9374 3543
fd3f8e52 3544 return netlink_request(&zns->netlink_cmd, &req);
67fb9374
CS
3545}
3546
3547int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3548 struct interface *br_if,
1a3bd37f 3549 const struct ethaddr *mac, vlanid_t vid)
67fb9374
CS
3550{
3551 int ret = 0;
3552 struct zebra_dplane_info dp_info;
3553
3554 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3555
3556 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3557 */
3558 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
3559 RTM_GETNEIGH,
3560 br_if, mac, vid);
3561 if (ret < 0)
3562 return ret;
3563
3564 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
9bfadae8 3565 &dp_info, 1, false);
67fb9374
CS
3566
3567 return ret;
3568}
036d93c0
MS
3569
3570/*
3571 * Netlink-specific handler for MAC updates using dataplane context object.
3572 */
67e3369e
JU
3573ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data,
3574 size_t datalen)
2232a77c 3575{
340845e2 3576 struct ipaddr vtep_ip;
036d93c0 3577 vlanid_t vid;
d4d4ec1c
RZ
3578 ssize_t total;
3579 int cmd;
340845e2
JU
3580 uint8_t flags;
3581 uint16_t state;
506efd37 3582 uint32_t nhg_id;
f188e68e
AK
3583 uint32_t update_flags;
3584 bool nfy = false;
3585 uint8_t nfy_flags = 0;
88217099
PG
3586 int proto = RTPROT_ZEBRA;
3587
3588 if (dplane_ctx_get_type(ctx) != 0)
3589 proto = zebra2proto(dplane_ctx_get_type(ctx));
d4d4ec1c
RZ
3590
3591 cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3592 ? RTM_NEWNEIGH : RTM_DELNEIGH;
036d93c0 3593
f188e68e 3594 flags = NTF_MASTER;
340845e2 3595 state = NUD_REACHABLE;
d62a17ae 3596
f188e68e
AK
3597 update_flags = dplane_ctx_mac_get_update_flags(ctx);
3598 if (update_flags & DPLANE_MAC_REMOTE) {
3599 flags |= NTF_SELF;
60e372e9
AK
3600 if (dplane_ctx_mac_is_sticky(ctx)) {
3601 /* NUD_NOARP prevents the entry from expiring */
3602 state |= NUD_NOARP;
3603 /* sticky the entry from moving */
f188e68e 3604 flags |= NTF_STICKY;
60e372e9 3605 } else {
f188e68e 3606 flags |= NTF_EXT_LEARNED;
60e372e9 3607 }
f188e68e
AK
3608 /* if it was static-local previously we need to clear the
3609 * notify flags on replace with remote
3610 */
3611 if (update_flags & DPLANE_MAC_WAS_STATIC)
3612 nfy = true;
3613 } else {
3614 /* local mac */
3615 if (update_flags & DPLANE_MAC_SET_STATIC) {
4bcdb608 3616 nfy_flags |= FDB_NOTIFY_BIT;
f188e68e
AK
3617 state |= NUD_NOARP;
3618 }
3619
3620 if (update_flags & DPLANE_MAC_SET_INACTIVE)
4bcdb608 3621 nfy_flags |= FDB_NOTIFY_INACTIVE_BIT;
f188e68e
AK
3622
3623 nfy = true;
3624 }
478566d6 3625
506efd37 3626 nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
340845e2
JU
3627 vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
3628 SET_IPADDR_V4(&vtep_ip);
d62a17ae 3629
036d93c0 3630 if (IS_ZEBRA_DEBUG_KERNEL) {
478566d6 3631 char vid_buf[20];
506efd37 3632 const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
478566d6 3633
340845e2
JU
3634 vid = dplane_ctx_mac_get_vlan(ctx);
3635 if (vid > 0)
478566d6
MS
3636 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3637 else
3638 vid_buf[0] = '\0';
036d93c0 3639
ccd187cd 3640 zlog_debug(
ef7b8be4 3641 "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s",
ccd187cd
AK
3642 nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
3643 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3644 vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
ef7b8be4 3645 mac, &vtep_ip, nhg_id,
ccd187cd
AK
3646 (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "",
3647 (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync"
3648 : "",
3649 (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "",
3650 (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive"
3651 : "",
3652 nfy ? " nfy" : "");
036d93c0 3653 }
d62a17ae 3654
0be6e7d7 3655 total = netlink_neigh_update_msg_encode(
0a27a2fe
PG
3656 ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN,
3657 &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy,
3658 nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen,
88217099 3659 proto);
d4d4ec1c
RZ
3660
3661 return total;
2232a77c 3662}
3663
f17b99ed
DS
3664/*
3665 * In the event the kernel deletes ipv4 link-local neighbor entries created for
3666 * 5549 support, re-install them.
3667 */
3668static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
9b036974
DS
3669 struct interface *ifp, struct ipaddr *ip,
3670 bool handle_failed)
f17b99ed
DS
3671{
3672 if (ndm->ndm_family != AF_INET)
3673 return;
3674
3675 if (!zif->v6_2_v4_ll_neigh_entry)
3676 return;
3677
3678 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
3679 return;
3680
9b036974
DS
3681 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
3682 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
3683 ifp->name);
3684 return;
3685 }
3686
f17b99ed
DS
3687 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
3688}
3689
d62a17ae 3690#define NUD_VALID \
3691 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
3692 | NUD_DELAY)
f188e68e
AK
3693#define NUD_LOCAL_ACTIVE \
3694 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
2232a77c 3695
80f6b5fa
PG
3696static int netlink_nbr_entry_state_to_zclient(int nbr_state)
3697{
3698 /* an exact match is done between
3699 * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h)
3700 * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX
3701 * (see in lib/zclient.h)
3702 */
3703 return nbr_state;
3704}
2414abd3 3705static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 3706{
d62a17ae 3707 struct ndmsg *ndm;
3708 struct interface *ifp;
3709 struct zebra_if *zif;
d62a17ae 3710 struct rtattr *tb[NDA_MAX + 1];
3711 struct interface *link_if;
3712 struct ethaddr mac;
3713 struct ipaddr ip;
3714 char buf[ETHER_ADDR_STRLEN];
d62a17ae 3715 int mac_present = 0;
a37f4598 3716 bool is_ext;
3717 bool is_router;
f188e68e 3718 bool local_inactive;
7c0e4dc6
AK
3719 uint32_t ext_flags = 0;
3720 bool dp_static = false;
7723e8d3
PG
3721 int l2_len = 0;
3722 int cmd;
d62a17ae 3723
3724 ndm = NLMSG_DATA(h);
3725
3726 /* The interface should exist. */
5895d33f 3727 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
d62a17ae 3728 ndm->ndm_ifindex);
2853fed6 3729 if (!ifp || !ifp->info)
d62a17ae 3730 return 0;
3731
20089ae2
DS
3732 zif = (struct zebra_if *)ifp->info;
3733
3734 /* Parse attributes and extract fields of interest. */
20089ae2
DS
3735 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3736
3737 if (!tb[NDA_DST]) {
bd47f3a3 3738 zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
9df414fe
QY
3739 nl_msg_type_to_str(h->nlmsg_type),
3740 nl_family_to_str(ndm->ndm_family), ifp->name,
096f7609 3741 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id);
d62a17ae 3742 return 0;
20089ae2
DS
3743 }
3744
3745 memset(&ip, 0, sizeof(struct ipaddr));
3746 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3747 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3748
f17b99ed
DS
3749 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3750 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
9b036974 3751 netlink_handle_5549(ndm, zif, ifp, &ip, false);
28bd0652
DS
3752 if (IS_ZEBRA_DEBUG_KERNEL)
3753 zlog_debug(
1d5453d6 3754 " Neighbor Entry Received is a 5549 entry, finished");
20089ae2
DS
3755 return 0;
3756 }
d62a17ae 3757
f17b99ed 3758 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
9b036974
DS
3759 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3760 netlink_handle_5549(ndm, zif, ifp, &ip, true);
f17b99ed 3761
7723e8d3
PG
3762 /* we send link layer information to client:
3763 * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH
3764 * - struct ipaddr ( for DEL and GET)
3765 * - struct ethaddr mac; (for NEW)
3766 */
3767 if (h->nlmsg_type == RTM_NEWNEIGH)
3768 cmd = ZEBRA_NHRP_NEIGH_ADDED;
3769 else if (h->nlmsg_type == RTM_GETNEIGH)
3770 cmd = ZEBRA_NHRP_NEIGH_GET;
3771 else if (h->nlmsg_type == RTM_DELNEIGH)
3772 cmd = ZEBRA_NHRP_NEIGH_REMOVED;
3773 else {
3774 zlog_debug("%s(): unknown nlmsg type %u", __func__,
3775 h->nlmsg_type);
3776 return 0;
3777 }
3778 if (tb[NDA_LLADDR]) {
3779 /* copy LLADDR information */
3780 l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]);
7723e8d3 3781 }
d603c077
PG
3782 if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) {
3783 union sockunion link_layer_ipv4;
3784
3785 if (l2_len) {
3786 sockunion_family(&link_layer_ipv4) = AF_INET;
3787 memcpy((void *)sockunion_get_addr(&link_layer_ipv4),
b7c21fad 3788 RTA_DATA(tb[NDA_LLADDR]), l2_len);
d603c077
PG
3789 } else
3790 sockunion_family(&link_layer_ipv4) = AF_UNSPEC;
80f6b5fa
PG
3791 zsend_nhrp_neighbor_notify(
3792 cmd, ifp, &ip,
3793 netlink_nbr_entry_state_to_zclient(ndm->ndm_state),
3794 &link_layer_ipv4);
d603c077 3795 }
7723e8d3
PG
3796
3797 if (h->nlmsg_type == RTM_GETNEIGH)
3798 return 0;
3799
d62a17ae 3800 /* The neighbor is present on an SVI. From this, we locate the
3801 * underlying
3802 * bridge because we're only interested in neighbors on a VxLAN bridge.
3803 * The bridge is located based on the nature of the SVI:
3804 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
3805 * interface
3806 * and is linked to the bridge
3807 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
3819e4ce 3808 * interface
d62a17ae 3809 * itself
3810 */
3811 if (IS_ZEBRA_IF_VLAN(ifp)) {
5895d33f 3812 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
71349e03 3813 zif->link_ifindex);
d62a17ae 3814 if (!link_if)
3815 return 0;
3816 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
3817 link_if = ifp;
28bd0652
DS
3818 else {
3819 if (IS_ZEBRA_DEBUG_KERNEL)
3820 zlog_debug(
1d5453d6 3821 " Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
d62a17ae 3822 return 0;
28bd0652 3823 }
d62a17ae 3824
d62a17ae 3825 memset(&mac, 0, sizeof(struct ethaddr));
d62a17ae 3826 if (h->nlmsg_type == RTM_NEWNEIGH) {
3827 if (tb[NDA_LLADDR]) {
ff8b7eb8 3828 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
3829 if (IS_ZEBRA_DEBUG_KERNEL)
3830 zlog_debug(
bd47f3a3 3831 "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
28bd0652
DS
3832 nl_msg_type_to_str(
3833 h->nlmsg_type),
3834 nl_family_to_str(
3835 ndm->ndm_family),
3836 ifp->name, ndm->ndm_ifindex,
096f7609
IR
3837 ifp->vrf->name,
3838 ifp->vrf->vrf_id,
28bd0652
DS
3839 (unsigned long)RTA_PAYLOAD(
3840 tb[NDA_LLADDR]));
d62a17ae 3841 return 0;
3842 }
3843
3844 mac_present = 1;
ff8b7eb8 3845 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 3846 }
3847
a37f4598 3848 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
3849 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
d62a17ae 3850
7c0e4dc6
AK
3851 if (tb[NDA_EXT_FLAGS]) {
3852 ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]);
3853 if (ext_flags & NTF_E_MH_PEER_SYNC)
3854 dp_static = true;
3855 }
3856
d62a17ae 3857 if (IS_ZEBRA_DEBUG_KERNEL)
3858 zlog_debug(
ef7b8be4 3859 "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x",
d62a17ae 3860 nl_msg_type_to_str(h->nlmsg_type),
3861 nl_family_to_str(ndm->ndm_family), ifp->name,
096f7609
IR
3862 ndm->ndm_ifindex, ifp->vrf->name,
3863 ifp->vrf->vrf_id, &ip,
d62a17ae 3864 mac_present
3865 ? prefix_mac2str(&mac, buf, sizeof(buf))
3866 : "",
7c0e4dc6 3867 ndm->ndm_state, ndm->ndm_flags, ext_flags);
d62a17ae 3868
3869 /* If the neighbor state is valid for use, process as an add or
3870 * update
3871 * else process as a delete. Note that the delete handling may
3872 * result
3873 * in re-adding the neighbor if it is a valid "remote" neighbor.
3874 */
f188e68e 3875 if (ndm->ndm_state & NUD_VALID) {
c7bfd085
AK
3876 if (zebra_evpn_mh_do_adv_reachable_neigh_only())
3877 local_inactive =
3878 !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
3879 else
3880 /* If EVPN-MH is not enabled we treat STALE
3881 * neighbors as locally-active and advertise
3882 * them
3883 */
3884 local_inactive = false;
f188e68e 3885
ee69da27 3886 return zebra_vxlan_handle_kernel_neigh_update(
7c0e4dc6
AK
3887 ifp, link_if, &ip, &mac, ndm->ndm_state, is_ext,
3888 is_router, local_inactive, dp_static);
f188e68e 3889 }
d62a17ae 3890
ee69da27 3891 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
d62a17ae 3892 }
3893
3894 if (IS_ZEBRA_DEBUG_KERNEL)
ef7b8be4 3895 zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA",
d62a17ae 3896 nl_msg_type_to_str(h->nlmsg_type),
3897 nl_family_to_str(ndm->ndm_family), ifp->name,
096f7609 3898 ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id,
ef7b8be4 3899 &ip);
d62a17ae 3900
3901 /* Process the delete - it may result in re-adding the neighbor if it is
3902 * a valid "remote" neighbor.
3903 */
ee69da27 3904 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2232a77c 3905}
3906
2414abd3 3907static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 3908{
d62a17ae 3909 int len;
3910 struct ndmsg *ndm;
2232a77c 3911
d62a17ae 3912 if (h->nlmsg_type != RTM_NEWNEIGH)
3913 return 0;
2232a77c 3914
d62a17ae 3915 /* Length validity. */
3916 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3917 if (len < 0)
3918 return -1;
2232a77c 3919
d62a17ae 3920 /* We are interested only in AF_INET or AF_INET6 notifications. */
3921 ndm = NLMSG_DATA(h);
3922 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
3923 return 0;
2232a77c 3924
2414abd3 3925 return netlink_neigh_change(h, len);
2232a77c 3926}
3927
3928/* Request for IP neighbor information from the kernel */
85a75f1e
MS
3929static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
3930 int type, ifindex_t ifindex)
2232a77c 3931{
d62a17ae 3932 struct {
3933 struct nlmsghdr n;
3934 struct ndmsg ndm;
3935 char buf[256];
3936 } req;
3937
3938 /* Form the request, specifying filter (rtattr) if needed. */
3939 memset(&req, 0, sizeof(req));
3940 req.n.nlmsg_type = type;
718f9b0f 3941 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 3942 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3943 req.ndm.ndm_family = family;
3944 if (ifindex)
312a6bee 3945 nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
d62a17ae 3946
fd3f8e52 3947 return netlink_request(netlink_cmd, &req);
2232a77c 3948}
3949
3950/*
3951 * IP Neighbor table read using netlink interface. This is invoked
3952 * at startup.
3953 */
d62a17ae 3954int netlink_neigh_read(struct zebra_ns *zns)
2232a77c 3955{
d62a17ae 3956 int ret;
85a75f1e
MS
3957 struct zebra_dplane_info dp_info;
3958
3959 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3960
d62a17ae 3961 /* Get IP neighbor table. */
85a75f1e
MS
3962 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
3963 0);
d62a17ae 3964 if (ret < 0)
3965 return ret;
85a75f1e 3966 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
9bfadae8 3967 &dp_info, 0, true);
2232a77c 3968
d62a17ae 3969 return ret;
2232a77c 3970}
3971
3972/*
3973 * IP Neighbor table read using netlink interface. This is for a specific
3974 * VLAN device.
3975 */
d62a17ae 3976int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2232a77c 3977{
d62a17ae 3978 int ret = 0;
85a75f1e
MS
3979 struct zebra_dplane_info dp_info;
3980
3981 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3982
85a75f1e 3983 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
d62a17ae 3984 vlan_if->ifindex);
3985 if (ret < 0)
3986 return ret;
85a75f1e 3987 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
9bfadae8 3988 &dp_info, 0, false);
2232a77c 3989
d62a17ae 3990 return ret;
2232a77c 3991}
3992
67fb9374
CS
3993/*
3994 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
3995 * read using netlink interface.
3996 */
3997static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
1a3bd37f
MS
3998 int type,
3999 const struct ipaddr *ip,
67fb9374
CS
4000 ifindex_t ifindex)
4001{
4002 struct {
4003 struct nlmsghdr n;
4004 struct ndmsg ndm;
4005 char buf[256];
4006 } req;
4007 int ipa_len;
4008
4009 /* Form the request, specifying filter (rtattr) if needed. */
4010 memset(&req, 0, sizeof(req));
4011 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4012 req.n.nlmsg_flags = NLM_F_REQUEST;
4013 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
4014 req.ndm.ndm_ifindex = ifindex;
4015
4016 if (IS_IPADDR_V4(ip)) {
4017 ipa_len = IPV4_MAX_BYTELEN;
4018 req.ndm.ndm_family = AF_INET;
4019
4020 } else {
4021 ipa_len = IPV6_MAX_BYTELEN;
4022 req.ndm.ndm_family = AF_INET6;
4023 }
4024
312a6bee 4025 nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
67fb9374 4026
ef7b8be4
DL
4027 if (IS_ZEBRA_DEBUG_KERNEL)
4028 zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x",
7c26c121 4029 __func__, nl_msg_type_to_str(type),
ef7b8be4
DL
4030 nl_family_to_str(req.ndm.ndm_family), ifindex, ip,
4031 req.n.nlmsg_flags);
7c26c121 4032
fd3f8e52 4033 return netlink_request(&zns->netlink_cmd, &req);
67fb9374
CS
4034}
4035
1a3bd37f
MS
4036int netlink_neigh_read_specific_ip(const struct ipaddr *ip,
4037 struct interface *vlan_if)
67fb9374
CS
4038{
4039 int ret = 0;
4040 struct zebra_ns *zns;
096f7609 4041 struct zebra_vrf *zvrf = vlan_if->vrf->info;
67fb9374
CS
4042 struct zebra_dplane_info dp_info;
4043
4044 zns = zvrf->zns;
4045
4046 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4047
4048 if (IS_ZEBRA_DEBUG_KERNEL)
ef7b8be4
DL
4049 zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)",
4050 __func__, vlan_if->name, vlan_if->ifindex, ip,
096f7609 4051 vlan_if->vrf->name, vlan_if->vrf->vrf_id);
67fb9374
CS
4052
4053 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
4054 vlan_if->ifindex);
4055 if (ret < 0)
4056 return ret;
4057
4058 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
9bfadae8 4059 &dp_info, 1, false);
67fb9374
CS
4060
4061 return ret;
4062}
4063
2414abd3 4064int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
2232a77c 4065{
d62a17ae 4066 int len;
4067 struct ndmsg *ndm;
2232a77c 4068
7723e8d3
PG
4069 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH
4070 || h->nlmsg_type == RTM_GETNEIGH))
d62a17ae 4071 return 0;
2232a77c 4072
d62a17ae 4073 /* Length validity. */
4074 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
9bdf8618 4075 if (len < 0) {
15569c58
DA
4076 zlog_err(
4077 "%s: Message received from netlink is of a broken size %d %zu",
4078 __func__, h->nlmsg_len,
4079 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
d62a17ae 4080 return -1;
9bdf8618 4081 }
2232a77c 4082
d62a17ae 4083 /* Is this a notification for the MAC FDB or IP neighbor table? */
4084 ndm = NLMSG_DATA(h);
4085 if (ndm->ndm_family == AF_BRIDGE)
2414abd3 4086 return netlink_macfdb_change(h, len, ns_id);
2232a77c 4087
d62a17ae 4088 if (ndm->ndm_type != RTN_UNICAST)
4089 return 0;
2232a77c 4090
d62a17ae 4091 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2414abd3 4092 return netlink_ipneigh_change(h, len, ns_id);
8a1b681c 4093 else {
9df414fe 4094 flog_warn(
e914ccbe 4095 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
4096 "Invalid address family: %u received from kernel neighbor change: %s",
4097 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
4098 return 0;
4099 }
2232a77c 4100
d62a17ae 4101 return 0;
2232a77c 4102}
4103
931fa60c
MS
4104/*
4105 * Utility neighbor-update function, using info from dplane context.
4106 */
67e3369e
JU
4107static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
4108 int cmd, void *buf, size_t buflen)
2232a77c 4109{
931fa60c 4110 const struct ipaddr *ip;
0a27a2fe
PG
4111 const struct ethaddr *mac = NULL;
4112 const struct ipaddr *link_ip = NULL;
4113 const void *link_ptr = NULL;
4114 char buf2[ETHER_ADDR_STRLEN];
4115
4116 int llalen;
931fa60c
MS
4117 uint8_t flags;
4118 uint16_t state;
340845e2 4119 uint8_t family;
ccd187cd
AK
4120 uint32_t update_flags;
4121 uint32_t ext_flags = 0;
4122 bool ext = false;
88217099
PG
4123 int proto = RTPROT_ZEBRA;
4124
4125 if (dplane_ctx_get_type(ctx) != 0)
4126 proto = zebra2proto(dplane_ctx_get_type(ctx));
d62a17ae 4127
931fa60c 4128 ip = dplane_ctx_neigh_get_ipaddr(ctx);
931fa60c 4129
0a27a2fe
PG
4130 if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL
4131 || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) {
4132 link_ip = dplane_ctx_neigh_get_link_ip(ctx);
4133 llalen = IPADDRSZ(link_ip);
4134 link_ptr = (const void *)&(link_ip->ip.addr);
4135 ipaddr2str(link_ip, buf2, sizeof(buf2));
4136 } else {
4137 mac = dplane_ctx_neigh_get_mac(ctx);
4138 llalen = ETH_ALEN;
4139 link_ptr = (const void *)mac;
4140 if (is_zero_mac(mac))
4141 mac = NULL;
4142 if (mac)
4143 prefix_mac2str(mac, buf2, sizeof(buf2));
4144 else
4145 snprintf(buf2, sizeof(buf2), "null");
4146 }
ccd187cd 4147 update_flags = dplane_ctx_neigh_get_update_flags(ctx);
931fa60c
MS
4148 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
4149 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
4150
340845e2 4151 family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
d62a17ae 4152
ccd187cd
AK
4153 if (update_flags & DPLANE_NEIGH_REMOTE) {
4154 flags |= NTF_EXT_LEARNED;
4155 /* if it was static-local previously we need to clear the
4156 * ext flags on replace with remote
4157 */
4158 if (update_flags & DPLANE_NEIGH_WAS_STATIC)
4159 ext = true;
0a27a2fe 4160 } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) {
ccd187cd
AK
4161 ext = true;
4162 /* local neigh */
4163 if (update_flags & DPLANE_NEIGH_SET_STATIC)
4164 ext_flags |= NTF_E_MH_PEER_SYNC;
ccd187cd 4165 }
ef7b8be4 4166 if (IS_ZEBRA_DEBUG_KERNEL)
340845e2 4167 zlog_debug(
0a27a2fe 4168 "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x",
340845e2
JU
4169 nl_msg_type_to_str(cmd), nl_family_to_str(family),
4170 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
0a27a2fe
PG
4171 ip, link_ip ? "Link " : "MAC ", buf2, flags, state,
4172 ext ? "ext " : "", ext_flags);
d62a17ae 4173
18f60fe9 4174 return netlink_neigh_update_msg_encode(
0a27a2fe
PG
4175 ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST,
4176 flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext,
88217099 4177 ext_flags, buf, buflen, proto);
2232a77c 4178}
4179
e18747a9
PG
4180static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx,
4181 void *data, size_t datalen)
4182{
4183 struct {
4184 struct nlmsghdr n;
4185 struct ndtmsg ndtm;
4186 char buf[];
4187 } *req = data;
4188 struct rtattr *nest;
4189 uint8_t family;
4190 ifindex_t idx;
4191 uint32_t val;
4192
4193 if (datalen < sizeof(*req))
4194 return 0;
4195 memset(req, 0, sizeof(*req));
4196 family = dplane_ctx_neightable_get_family(ctx);
4197 idx = dplane_ctx_get_ifindex(ctx);
4198
4199 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
4200 req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE;
4201 req->n.nlmsg_type = RTM_SETNEIGHTBL;
4202 req->ndtm.ndtm_family = family;
4203
4204 nl_attr_put(&req->n, datalen, NDTA_NAME,
4205 family == AF_INET ? "arp_cache" : "ndisc_cache", 10);
4206 nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS);
4207 if (nest == NULL)
4208 return 0;
4209 if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx)))
4210 return 0;
4211 val = dplane_ctx_neightable_get_app_probes(ctx);
4212 if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val)))
4213 return 0;
4214 val = dplane_ctx_neightable_get_mcast_probes(ctx);
4215 if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val,
4216 sizeof(val)))
4217 return 0;
4218 val = dplane_ctx_neightable_get_ucast_probes(ctx);
4219 if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val,
4220 sizeof(val)))
4221 return 0;
4222 nl_attr_nest_end(&req->n, nest);
4223
4224 return NLMSG_ALIGN(req->n.nlmsg_len);
4225}
4226
67e3369e
JU
4227static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx,
4228 void *buf, size_t buflen)
2232a77c 4229{
67e3369e 4230 ssize_t ret;
2232a77c 4231
931fa60c
MS
4232 switch (dplane_ctx_get_op(ctx)) {
4233 case DPLANE_OP_NEIGH_INSTALL:
4234 case DPLANE_OP_NEIGH_UPDATE:
d68e74b4 4235 case DPLANE_OP_NEIGH_DISCOVER:
0a27a2fe 4236 case DPLANE_OP_NEIGH_IP_INSTALL:
67e3369e 4237 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen);
931fa60c
MS
4238 break;
4239 case DPLANE_OP_NEIGH_DELETE:
0a27a2fe 4240 case DPLANE_OP_NEIGH_IP_DELETE:
67e3369e 4241 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen);
931fa60c 4242 break;
0bbd4ff4 4243 case DPLANE_OP_VTEP_ADD:
67e3369e
JU
4244 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf,
4245 buflen);
0bbd4ff4
MS
4246 break;
4247 case DPLANE_OP_VTEP_DELETE:
67e3369e
JU
4248 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf,
4249 buflen);
0bbd4ff4 4250 break;
e18747a9
PG
4251 case DPLANE_OP_NEIGH_TABLE_UPDATE:
4252 ret = netlink_neigh_table_update_ctx(ctx, buf, buflen);
4253 break;
931fa60c 4254 default:
67e3369e 4255 ret = -1;
931fa60c 4256 }
2232a77c 4257
67e3369e
JU
4258 return ret;
4259}
4260
4261/*
4262 * Update MAC, using dataplane context object.
4263 */
4264
67e3369e
JU
4265enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth,
4266 struct zebra_dplane_ctx *ctx)
4267{
4268 return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx,
4269 false);
4270}
4271
67e3369e
JU
4272enum netlink_msg_status
4273netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
4274{
4275 return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder,
4276 false);
6fe2b0e6
CS
4277}
4278
16c628de
MS
4279/*
4280 * MPLS label forwarding table change via netlink interface, using dataplane
4281 * context information.
4282 */
0be6e7d7
JU
4283ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
4284 void *buf, size_t buflen)
16c628de
MS
4285{
4286 mpls_lse_t lse;
ee70f629 4287 const struct nhlfe_list_head *head;
f2595bd5 4288 const struct zebra_nhlfe *nhlfe;
16c628de
MS
4289 struct nexthop *nexthop = NULL;
4290 unsigned int nexthop_num;
4291 const char *routedesc;
4292 int route_type;
9a0132a5 4293 struct prefix p = {0};
d4000d7b
DS
4294 struct nlsock *nl =
4295 kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
16c628de
MS
4296
4297 struct {
4298 struct nlmsghdr n;
4299 struct rtmsg r;
0be6e7d7
JU
4300 char buf[0];
4301 } *req = buf;
4302
4303 if (buflen < sizeof(*req))
4304 return 0;
16c628de 4305
0be6e7d7 4306 memset(req, 0, sizeof(*req));
16c628de
MS
4307
4308 /*
4309 * Count # nexthops so we can decide whether to use singlepath
4310 * or multipath case.
4311 */
4312 nexthop_num = 0;
ee70f629
MS
4313 head = dplane_ctx_get_nhlfe_list(ctx);
4314 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
4315 nexthop = nhlfe->nexthop;
4316 if (!nexthop)
4317 continue;
4318 if (cmd == RTM_NEWROUTE) {
4319 /* Count all selected NHLFEs */
4320 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4321 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
4322 nexthop_num++;
4323 } else { /* DEL */
4324 /* Count all installed NHLFEs */
4325 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
4326 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
4327 nexthop_num++;
4328 }
4329 }
4330
4331 if ((nexthop_num == 0) ||
4332 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
4333 return 0;
4334
0be6e7d7
JU
4335 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
4336 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
4337 req->n.nlmsg_type = cmd;
d4000d7b 4338 req->n.nlmsg_pid = nl->snl.nl_pid;
16c628de 4339
0be6e7d7
JU
4340 req->r.rtm_family = AF_MPLS;
4341 req->r.rtm_table = RT_TABLE_MAIN;
4342 req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
4343 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
4344 req->r.rtm_type = RTN_UNICAST;
16c628de
MS
4345
4346 if (cmd == RTM_NEWROUTE) {
4347 /* We do a replace to handle update. */
0be6e7d7 4348 req->n.nlmsg_flags |= NLM_F_REPLACE;
16c628de
MS
4349
4350 /* set the protocol value if installing */
4351 route_type = re_type_from_lsp_type(
4352 dplane_ctx_get_best_nhlfe(ctx)->type);
0be6e7d7 4353 req->r.rtm_protocol = zebra2proto(route_type);
16c628de
MS
4354 }
4355
4356 /* Fill destination */
4357 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
0be6e7d7
JU
4358 if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
4359 return 0;
16c628de
MS
4360
4361 /* Fill nexthops (paths) based on single-path or multipath. The paths
4362 * chosen depend on the operation.
4363 */
fc608372 4364 if (nexthop_num == 1) {
16c628de
MS
4365 routedesc = "single-path";
4366 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4367 routedesc);
4368
4369 nexthop_num = 0;
ee70f629 4370 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
4371 nexthop = nhlfe->nexthop;
4372 if (!nexthop)
4373 continue;
4374
4375 if ((cmd == RTM_NEWROUTE
4376 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4377 && CHECK_FLAG(nexthop->flags,
4378 NEXTHOP_FLAG_ACTIVE)))
4379 || (cmd == RTM_DELROUTE
4380 && (CHECK_FLAG(nhlfe->flags,
4381 NHLFE_FLAG_INSTALLED)
4382 && CHECK_FLAG(nexthop->flags,
4383 NEXTHOP_FLAG_FIB)))) {
4384 /* Add the gateway */
0be6e7d7
JU
4385 if (!_netlink_mpls_build_singlepath(
4386 &p, routedesc, nhlfe, &req->n,
4387 &req->r, buflen, cmd))
4388 return false;
16c628de
MS
4389
4390 nexthop_num++;
4391 break;
4392 }
4393 }
4394 } else { /* Multipath case */
312a6bee 4395 struct rtattr *nest;
81793ac1 4396 const union g_addr *src1 = NULL;
16c628de 4397
0be6e7d7
JU
4398 nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
4399 if (!nest)
4400 return 0;
16c628de
MS
4401
4402 routedesc = "multipath";
4403 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4404 routedesc);
4405
4406 nexthop_num = 0;
ee70f629 4407 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
4408 nexthop = nhlfe->nexthop;
4409 if (!nexthop)
4410 continue;
4411
16c628de
MS
4412 if ((cmd == RTM_NEWROUTE
4413 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4414 && CHECK_FLAG(nexthop->flags,
4415 NEXTHOP_FLAG_ACTIVE)))
4416 || (cmd == RTM_DELROUTE
4417 && (CHECK_FLAG(nhlfe->flags,
4418 NHLFE_FLAG_INSTALLED)
4419 && CHECK_FLAG(nexthop->flags,
4420 NEXTHOP_FLAG_FIB)))) {
4421 nexthop_num++;
4422
4423 /* Build the multipath */
0be6e7d7
JU
4424 if (!_netlink_mpls_build_multipath(
4425 &p, routedesc, nhlfe, &req->n,
4426 buflen, &req->r, &src1))
4427 return 0;
16c628de
MS
4428 }
4429 }
4430
4431 /* Add the multipath */
0be6e7d7 4432 nl_attr_nest_end(&req->n, nest);
16c628de
MS
4433 }
4434
0be6e7d7 4435 return NLMSG_ALIGN(req->n.nlmsg_len);
16c628de 4436}
506efd37
AK
4437
4438/****************************************************************************
4439* This code was developed in a branch that didn't have dplane APIs for
4440* MAC updates. Hence the use of the legacy style. It will be moved to
4441* the new dplane style pre-merge to master. XXX
4442*/
4443static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
4444{
4445 struct {
4446 struct nlmsghdr n;
4447 struct nhmsg nhm;
4448 char buf[256];
4449 } req;
4450 int cmd = RTM_NEWNEXTHOP;
4451 struct zebra_vrf *zvrf;
4452 struct zebra_ns *zns;
4453
4454 zvrf = zebra_vrf_get_evpn();
4455 if (!zvrf)
4456 return -1;
4457 zns = zvrf->zns;
4458
4459 memset(&req, 0, sizeof(req));
4460
4461 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4462 req.n.nlmsg_flags = NLM_F_REQUEST;
4463 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4464 req.n.nlmsg_type = cmd;
4465 req.nhm.nh_family = AF_INET;
4466
4467 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4468 return -1;
4469 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4470 return -1;
4471 if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
4472 &vtep_ip, IPV4_MAX_BYTELEN))
4473 return -1;
4474
4475 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
9bcef951
MS
4476 zlog_debug("Tx %s fdb-nh 0x%x %pI4",
4477 nl_msg_type_to_str(cmd), nh_id, &vtep_ip);
506efd37
AK
4478 }
4479
4480 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
9bfadae8 4481 false);
506efd37
AK
4482}
4483
4484static int netlink_fdb_nh_del(uint32_t nh_id)
4485{
4486 struct {
4487 struct nlmsghdr n;
4488 struct nhmsg nhm;
4489 char buf[256];
4490 } req;
4491 int cmd = RTM_DELNEXTHOP;
4492 struct zebra_vrf *zvrf;
4493 struct zebra_ns *zns;
4494
4495 zvrf = zebra_vrf_get_evpn();
4496 if (!zvrf)
4497 return -1;
4498 zns = zvrf->zns;
4499
4500 memset(&req, 0, sizeof(req));
4501
4502 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4503 req.n.nlmsg_flags = NLM_F_REQUEST;
4504 req.n.nlmsg_type = cmd;
4505 req.nhm.nh_family = AF_UNSPEC;
4506
4507 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4508 return -1;
4509
4510 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4511 zlog_debug("Tx %s fdb-nh 0x%x",
4512 nl_msg_type_to_str(cmd), nh_id);
4513 }
4514
4515 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
9bfadae8 4516 false);
506efd37
AK
4517}
4518
4519static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
4520 struct nh_grp *nh_ids)
4521{
4522 struct {
4523 struct nlmsghdr n;
4524 struct nhmsg nhm;
4525 char buf[256];
4526 } req;
4527 int cmd = RTM_NEWNEXTHOP;
4528 struct zebra_vrf *zvrf;
4529 struct zebra_ns *zns;
4530 struct nexthop_grp grp[nh_cnt];
4531 uint32_t i;
4532
4533 zvrf = zebra_vrf_get_evpn();
4534 if (!zvrf)
4535 return -1;
4536 zns = zvrf->zns;
4537
4538 memset(&req, 0, sizeof(req));
4539
4540 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4541 req.n.nlmsg_flags = NLM_F_REQUEST;
4542 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4543 req.n.nlmsg_type = cmd;
4544 req.nhm.nh_family = AF_UNSPEC;
4545
4546 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
4547 return -1;
4548 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4549 return -1;
4550 memset(&grp, 0, sizeof(grp));
4551 for (i = 0; i < nh_cnt; ++i) {
4552 grp[i].id = nh_ids[i].id;
4553 grp[i].weight = nh_ids[i].weight;
4554 }
4555 if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
4556 grp, nh_cnt * sizeof(struct nexthop_grp)))
4557 return -1;
4558
4559
4560 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4561 char vtep_str[ES_VTEP_LIST_STR_SZ];
9e0c2fd1 4562 char nh_buf[16];
506efd37
AK
4563
4564 vtep_str[0] = '\0';
4565 for (i = 0; i < nh_cnt; ++i) {
9e0c2fd1 4566 snprintf(nh_buf, sizeof(nh_buf), "%u ",
506efd37 4567 grp[i].id);
9e0c2fd1 4568 strlcat(vtep_str, nh_buf, sizeof(vtep_str));
506efd37
AK
4569 }
4570
4571 zlog_debug("Tx %s fdb-nhg 0x%x %s",
4572 nl_msg_type_to_str(cmd), nhg_id, vtep_str);
4573 }
4574
4575 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
9bfadae8 4576 false);
506efd37
AK
4577}
4578
4579static int netlink_fdb_nhg_del(uint32_t nhg_id)
4580{
4581 return netlink_fdb_nh_del(nhg_id);
4582}
4583
4584int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
4585{
4586 return netlink_fdb_nh_update(nh_id, vtep_ip);
4587}
4588
4589int kernel_del_mac_nh(uint32_t nh_id)
4590{
4591 return netlink_fdb_nh_del(nh_id);
4592}
4593
4594int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
4595 struct nh_grp *nh_ids)
4596{
4597 return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
4598}
4599
4600int kernel_del_mac_nhg(uint32_t nhg_id)
4601{
4602 return netlink_fdb_nhg_del(nhg_id);
4603}
4604
ddfeb486 4605#endif /* HAVE_NETLINK */