]> git.proxmox.com Git - mirror_frr.git/blame - zebra/rt_netlink.c
zebra: support for MAC-IP sync routes
[mirror_frr.git] / zebra / rt_netlink.c
CommitLineData
718e3744 1/* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21#include <zebra.h>
ddfeb486
DL
22
23#ifdef HAVE_NETLINK
24
8ccc7e80 25#include <net/if_arp.h>
f188e68e 26#include <linux/if_bridge.h>
ba777396
RW
27#include <linux/lwtunnel.h>
28#include <linux/mpls_iptunnel.h>
29#include <linux/neighbour.h>
30#include <linux/rtnetlink.h>
d9f5b2f5 31#include <linux/nexthop.h>
718e3744 32
33/* Hack for GNU libc version 2. */
34#ifndef MSG_TRUNC
35#define MSG_TRUNC 0x20
36#endif /* MSG_TRUNC */
37
38#include "linklist.h"
39#include "if.h"
40#include "log.h"
41#include "prefix.h"
42#include "connected.h"
43#include "table.h"
26e2ae36 44#include "memory.h"
4a1ab8e4 45#include "zebra_memory.h"
718e3744 46#include "rib.h"
e04ab74d 47#include "thread.h"
edd7c245 48#include "privs.h"
fb018d25 49#include "nexthop.h"
78104b9b 50#include "vrf.h"
5e6a74d8 51#include "vty.h"
40c7bdb0 52#include "mpls.h"
13d60d35 53#include "vxlan.h"
8d03bc50 54#include "printfrr.h"
718e3744 55
bf094f69 56#include "zebra/zapi_msg.h"
fe18ee2d 57#include "zebra/zebra_ns.h"
7c551956 58#include "zebra/zebra_vrf.h"
6621ca86 59#include "zebra/rt.h"
718e3744 60#include "zebra/redistribute.h"
61#include "zebra/interface.h"
62#include "zebra/debug.h"
12f6fb97 63#include "zebra/rtadv.h"
567b877d 64#include "zebra/zebra_ptm.h"
40c7bdb0 65#include "zebra/zebra_mpls.h"
1fdc9eae 66#include "zebra/kernel_netlink.h"
67#include "zebra/rt_netlink.h"
d9f5b2f5 68#include "zebra/zebra_nhg.h"
e3be0432 69#include "zebra/zebra_mroute.h"
2232a77c 70#include "zebra/zebra_vxlan.h"
364fed6b 71#include "zebra/zebra_errors.h"
506efd37 72#include "zebra/zebra_evpn_mh.h"
e3be0432 73
40c7bdb0 74#ifndef AF_MPLS
75#define AF_MPLS 28
76#endif
77
2232a77c 78static vlanid_t filter_vlan = 0;
79
7c99d51b
MS
80/* We capture whether the current kernel supports nexthop ids; by
81 * default, we'll use them if possible. There's also a configuration
82 * available to _disable_ use of kernel nexthops.
83 */
fec211ad 84static bool supports_nh;
81505946 85
d62a17ae 86struct gw_family_t {
d7c0a89a
QY
87 uint16_t filler;
88 uint16_t family;
d62a17ae 89 union g_addr gate;
40c7bdb0 90};
91
2b64873d
DL
92static const char ipv4_ll_buf[16] = "169.254.0.1";
93static struct in_addr ipv4_ll;
8755598a 94
002e5c43
SW
95/* Is this a ipv4 over ipv6 route? */
96static bool is_route_v4_over_v6(unsigned char rtm_family,
97 enum nexthop_types_t nexthop_type)
98{
99 if (rtm_family == AF_INET
100 && (nexthop_type == NEXTHOP_TYPE_IPV6
101 || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
102 return true;
103
104 return false;
105}
106
7c99d51b
MS
107/* Helper to control use of kernel-level nexthop ids */
108static bool kernel_nexthops_supported(void)
109{
d982012a
SW
110 return (supports_nh && !vrf_is_backend_netns()
111 && zebra_nhg_kernel_nexthops_enabled());
7c99d51b
MS
112}
113
8755598a
DS
114/*
115 * The ipv4_ll data structure is used for all 5549
116 * additions to the kernel. Let's figure out the
117 * correct value one time instead for every
118 * install/remove of a 5549 type route
119 */
d62a17ae 120void rt_netlink_init(void)
8755598a 121{
d62a17ae 122 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
8755598a
DS
123}
124
931fa60c
MS
125/*
126 * Mapping from dataplane neighbor flags to netlink flags
127 */
128static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
129{
130 uint8_t flags = 0;
131
132 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
133 flags |= NTF_EXT_LEARNED;
134 if (dplane_flags & DPLANE_NTF_ROUTER)
135 flags |= NTF_ROUTER;
136
137 return flags;
138}
139
140/*
141 * Mapping from dataplane neighbor state to netlink state
142 */
143static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
144{
145 uint16_t state = 0;
146
147 if (dplane_state & DPLANE_NUD_REACHABLE)
148 state |= NUD_REACHABLE;
149 if (dplane_state & DPLANE_NUD_STALE)
150 state |= NUD_STALE;
151 if (dplane_state & DPLANE_NUD_NOARP)
152 state |= NUD_NOARP;
153 if (dplane_state & DPLANE_NUD_PROBE)
154 state |= NUD_PROBE;
155
156 return state;
157}
158
159
6a6d11a3 160static inline bool is_selfroute(int proto)
23b1f334 161{
d62a17ae 162 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
d4d71f11 163 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
d62a17ae 164 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
165 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
915902cb 166 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
0761368a 167 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
da82f6b4 168 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)) {
6a6d11a3 169 return true;
d62a17ae 170 }
171
6a6d11a3 172 return false;
23b1f334
DD
173}
174
915902cb 175static inline int zebra2proto(int proto)
23b1f334 176{
d62a17ae 177 switch (proto) {
178 case ZEBRA_ROUTE_BABEL:
179 proto = RTPROT_BABEL;
180 break;
181 case ZEBRA_ROUTE_BGP:
182 proto = RTPROT_BGP;
183 break;
184 case ZEBRA_ROUTE_OSPF:
185 case ZEBRA_ROUTE_OSPF6:
186 proto = RTPROT_OSPF;
187 break;
188 case ZEBRA_ROUTE_STATIC:
d4d71f11 189 proto = RTPROT_ZSTATIC;
d62a17ae 190 break;
191 case ZEBRA_ROUTE_ISIS:
192 proto = RTPROT_ISIS;
193 break;
194 case ZEBRA_ROUTE_RIP:
195 proto = RTPROT_RIP;
196 break;
197 case ZEBRA_ROUTE_RIPNG:
198 proto = RTPROT_RIPNG;
199 break;
200 case ZEBRA_ROUTE_NHRP:
201 proto = RTPROT_NHRP;
202 break;
203 case ZEBRA_ROUTE_EIGRP:
204 proto = RTPROT_EIGRP;
205 break;
206 case ZEBRA_ROUTE_LDP:
207 proto = RTPROT_LDP;
208 break;
8a71d93d
DS
209 case ZEBRA_ROUTE_SHARP:
210 proto = RTPROT_SHARP;
211 break;
0761368a
DS
212 case ZEBRA_ROUTE_PBR:
213 proto = RTPROT_PBR;
214 break;
da82f6b4
CF
215 case ZEBRA_ROUTE_OPENFABRIC:
216 proto = RTPROT_OPENFABRIC;
217 break;
a56ec5c0 218 case ZEBRA_ROUTE_TABLE:
38e40db1 219 case ZEBRA_ROUTE_NHG:
a56ec5c0
DS
220 proto = RTPROT_ZEBRA;
221 break;
d62a17ae 222 default:
0761368a
DS
223 /*
224 * When a user adds a new protocol this will show up
225 * to let them know to do something about it. This
226 * is intentionally a warn because we should see
227 * this as part of development of a new protocol
228 */
9df414fe
QY
229 zlog_debug(
230 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 231 __func__, proto);
d62a17ae 232 proto = RTPROT_ZEBRA;
233 break;
234 }
235
236 return proto;
23b1f334
DD
237}
238
38e40db1 239static inline int proto2zebra(int proto, int family, bool is_nexthop)
915902cb
DS
240{
241 switch (proto) {
242 case RTPROT_BABEL:
243 proto = ZEBRA_ROUTE_BABEL;
244 break;
245 case RTPROT_BGP:
246 proto = ZEBRA_ROUTE_BGP;
247 break;
248 case RTPROT_OSPF:
996c9314
LB
249 proto = (family == AFI_IP) ? ZEBRA_ROUTE_OSPF
250 : ZEBRA_ROUTE_OSPF6;
915902cb
DS
251 break;
252 case RTPROT_ISIS:
253 proto = ZEBRA_ROUTE_ISIS;
254 break;
255 case RTPROT_RIP:
256 proto = ZEBRA_ROUTE_RIP;
257 break;
258 case RTPROT_RIPNG:
259 proto = ZEBRA_ROUTE_RIPNG;
260 break;
261 case RTPROT_NHRP:
262 proto = ZEBRA_ROUTE_NHRP;
263 break;
264 case RTPROT_EIGRP:
265 proto = ZEBRA_ROUTE_EIGRP;
266 break;
267 case RTPROT_LDP:
268 proto = ZEBRA_ROUTE_LDP;
269 break;
270 case RTPROT_STATIC:
d4d71f11 271 case RTPROT_ZSTATIC:
915902cb
DS
272 proto = ZEBRA_ROUTE_STATIC;
273 break;
0761368a
DS
274 case RTPROT_SHARP:
275 proto = ZEBRA_ROUTE_SHARP;
276 break;
277 case RTPROT_PBR:
278 proto = ZEBRA_ROUTE_PBR;
279 break;
da82f6b4
CF
280 case RTPROT_OPENFABRIC:
281 proto = ZEBRA_ROUTE_OPENFABRIC;
282 break;
38e40db1
SW
283 case RTPROT_ZEBRA:
284 if (is_nexthop) {
285 proto = ZEBRA_ROUTE_NHG;
286 break;
287 }
288 /* Intentional fall thru */
915902cb 289 default:
0761368a
DS
290 /*
291 * When a user adds a new protocol this will show up
292 * to let them know to do something about it. This
293 * is intentionally a warn because we should see
294 * this as part of development of a new protocol
295 */
9df414fe
QY
296 zlog_debug(
297 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 298 __func__, proto);
915902cb
DS
299 proto = ZEBRA_ROUTE_KERNEL;
300 break;
301 }
302 return proto;
303}
304
12f6fb97
DS
305/*
306Pending: create an efficient table_id (in a tree/hash) based lookup)
307 */
9d866c07 308vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
12f6fb97 309{
d62a17ae 310 struct vrf *vrf;
311 struct zebra_vrf *zvrf;
12f6fb97 312
a2addae8 313 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
78dd30b2
PG
314 zvrf = vrf->info;
315 if (zvrf == NULL)
d62a17ae 316 continue;
78dd30b2
PG
317 /* case vrf with netns : match the netnsid */
318 if (vrf_is_backend_netns()) {
319 if (ns_id == zvrf_id(zvrf))
320 return zvrf_id(zvrf);
321 } else {
322 /* VRF is VRF_BACKEND_VRF_LITE */
323 if (zvrf->table_id != table_id)
324 continue;
325 return zvrf_id(zvrf);
326 }
d62a17ae 327 }
12f6fb97 328
d62a17ae 329 return VRF_DEFAULT;
12f6fb97
DS
330}
331
87da6a60
SW
332/**
333 * @parse_encap_mpls() - Parses encapsulated mpls attributes
334 * @tb: Pointer to rtattr to look for nested items in.
335 * @labels: Pointer to store labels in.
336 *
337 * Return: Number of mpls labels found.
338 */
339static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
340{
341 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
342 mpls_lse_t *lses = NULL;
343 int num_labels = 0;
344 uint32_t ttl = 0;
345 uint32_t bos = 0;
346 uint32_t exp = 0;
347 mpls_label_t label = 0;
348
349 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
350 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
351 while (!bos && num_labels < MPLS_MAX_LABELS) {
352 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
353 labels[num_labels++] = label;
354 }
355
356 return num_labels;
357}
358
77a44d94
SW
359static struct nexthop
360parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
361 enum blackhole_type bh_type, int index, void *prefsrc,
20822f9d 362 void *gate, afi_t afi, vrf_id_t vrf_id)
77a44d94
SW
363{
364 struct interface *ifp = NULL;
365 struct nexthop nh = {0};
366 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
367 int num_labels = 0;
368
20822f9d 369 vrf_id_t nh_vrf_id = vrf_id;
77a44d94
SW
370 size_t sz = (afi == AFI_IP) ? 4 : 16;
371
372 if (bh_type == BLACKHOLE_UNSPEC) {
373 if (index && !gate)
374 nh.type = NEXTHOP_TYPE_IFINDEX;
375 else if (index && gate)
376 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
377 : NEXTHOP_TYPE_IPV6_IFINDEX;
378 else if (!index && gate)
379 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
380 : NEXTHOP_TYPE_IPV6;
381 else {
382 nh.type = NEXTHOP_TYPE_BLACKHOLE;
383 nh.bh_type = bh_type;
384 }
385 } else {
386 nh.type = NEXTHOP_TYPE_BLACKHOLE;
387 nh.bh_type = bh_type;
388 }
389 nh.ifindex = index;
390 if (prefsrc)
391 memcpy(&nh.src, prefsrc, sz);
392 if (gate)
393 memcpy(&nh.gate, gate, sz);
394
395 if (index) {
396 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
397 if (ifp)
398 nh_vrf_id = ifp->vrf_id;
399 }
400 nh.vrf_id = nh_vrf_id;
401
402 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
403 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
404 == LWTUNNEL_ENCAP_MPLS) {
405 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
406 }
407
408 if (rtm->rtm_flags & RTNH_F_ONLINK)
409 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
410
411 if (num_labels)
412 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
413
414 return nh;
415}
416
20822f9d 417static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
0eb97b86 418 struct nexthop_group *ng,
20822f9d
SW
419 struct rtmsg *rtm,
420 struct rtnexthop *rtnh,
421 struct rtattr **tb,
422 void *prefsrc, vrf_id_t vrf_id)
423{
424 void *gate = NULL;
425 struct interface *ifp = NULL;
426 int index = 0;
427 /* MPLS labels */
428 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
429 int num_labels = 0;
430 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
431
432 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
433 vrf_id_t nh_vrf_id = vrf_id;
434
20822f9d
SW
435 for (;;) {
436 struct nexthop *nh = NULL;
437
438 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
439 break;
440
441 index = rtnh->rtnh_ifindex;
442 if (index) {
443 /*
444 * Yes we are looking this up
445 * for every nexthop and just
446 * using the last one looked
447 * up right now
448 */
449 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
450 index);
451 if (ifp)
452 nh_vrf_id = ifp->vrf_id;
453 else {
454 flog_warn(
455 EC_ZEBRA_UNKNOWN_INTERFACE,
456 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
15569c58 457 __func__, index);
20822f9d
SW
458 nh_vrf_id = VRF_DEFAULT;
459 }
460 } else
461 nh_vrf_id = vrf_id;
462
463 if (rtnh->rtnh_len > sizeof(*rtnh)) {
464 memset(rtnh_tb, 0, sizeof(rtnh_tb));
465
466 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
467 rtnh->rtnh_len - sizeof(*rtnh));
468 if (rtnh_tb[RTA_GATEWAY])
469 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
470 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
471 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
472 == LWTUNNEL_ENCAP_MPLS) {
473 num_labels = parse_encap_mpls(
474 rtnh_tb[RTA_ENCAP], labels);
475 }
476 }
477
f3354e16
SW
478 if (gate && rtm->rtm_family == AF_INET) {
479 if (index)
0eb97b86
MS
480 nh = nexthop_from_ipv4_ifindex(
481 gate, prefsrc, index, nh_vrf_id);
f3354e16 482 else
0eb97b86
MS
483 nh = nexthop_from_ipv4(gate, prefsrc,
484 nh_vrf_id);
f3354e16
SW
485 } else if (gate && rtm->rtm_family == AF_INET6) {
486 if (index)
0eb97b86
MS
487 nh = nexthop_from_ipv6_ifindex(
488 gate, index, nh_vrf_id);
f3354e16 489 else
0eb97b86 490 nh = nexthop_from_ipv6(gate, nh_vrf_id);
20822f9d 491 } else
0eb97b86 492 nh = nexthop_from_ifindex(index, nh_vrf_id);
20822f9d
SW
493
494 if (nh) {
df7fb580
DS
495 nh->weight = rtnh->rtnh_hops + 1;
496
20822f9d
SW
497 if (num_labels)
498 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
499 num_labels, labels);
500
501 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
502 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
0eb97b86
MS
503
504 /* Add to temporary list */
505 nexthop_group_add_sorted(ng, nh);
20822f9d
SW
506 }
507
508 if (rtnh->rtnh_len == 0)
509 break;
510
511 len -= NLMSG_ALIGN(rtnh->rtnh_len);
512 rtnh = RTNH_NEXT(rtnh);
513 }
514
0eb97b86 515 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
20822f9d
SW
516
517 return nhop_num;
518}
519
718e3744 520/* Looking up routing table by netlink interface. */
2414abd3 521static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
d62a17ae 522 int startup)
718e3744 523{
d62a17ae 524 int len;
525 struct rtmsg *rtm;
526 struct rtattr *tb[RTA_MAX + 1];
d7c0a89a 527 uint8_t flags = 0;
d62a17ae 528 struct prefix p;
792fa92e 529 struct prefix_ipv6 src_p = {};
78dd30b2 530 vrf_id_t vrf_id;
6a6d11a3 531 bool selfroute;
d62a17ae 532
533 char anyaddr[16] = {0};
534
915902cb 535 int proto = ZEBRA_ROUTE_KERNEL;
d62a17ae 536 int index = 0;
537 int table;
538 int metric = 0;
d7c0a89a 539 uint32_t mtu = 0;
25715c7e 540 uint8_t distance = 0;
4e40b6d6 541 route_tag_t tag = 0;
fcc89a9c 542 uint32_t nhe_id = 0;
d62a17ae 543
544 void *dest = NULL;
545 void *gate = NULL;
546 void *prefsrc = NULL; /* IPv4 preferred source host address */
547 void *src = NULL; /* IPv6 srcdest source prefix */
e655a03c 548 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
d62a17ae 549
550 rtm = NLMSG_DATA(h);
551
552 if (startup && h->nlmsg_type != RTM_NEWROUTE)
553 return 0;
e655a03c
DL
554 switch (rtm->rtm_type) {
555 case RTN_UNICAST:
556 break;
557 case RTN_BLACKHOLE:
558 bh_type = BLACKHOLE_NULL;
559 break;
560 case RTN_UNREACHABLE:
561 bh_type = BLACKHOLE_REJECT;
562 break;
563 case RTN_PROHIBIT:
564 bh_type = BLACKHOLE_ADMINPROHIB;
565 break;
566 default:
8c8f250b
DS
567 if (IS_ZEBRA_DEBUG_KERNEL)
568 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
569 nl_rttype_to_str(rtm->rtm_type),
570 rtm->rtm_type);
d62a17ae 571 return 0;
e655a03c 572 }
d62a17ae 573
574 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 575 if (len < 0) {
15569c58
DA
576 zlog_err(
577 "%s: Message received from netlink is of a broken size %d %zu",
578 __func__, h->nlmsg_len,
579 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 580 return -1;
9bdf8618 581 }
d62a17ae 582
0d6f7fd6 583 memset(tb, 0, sizeof(tb));
d62a17ae 584 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
585
586 if (rtm->rtm_flags & RTM_F_CLONED)
587 return 0;
588 if (rtm->rtm_protocol == RTPROT_REDIRECT)
589 return 0;
590 if (rtm->rtm_protocol == RTPROT_KERNEL)
591 return 0;
592
6a6d11a3
NN
593 selfroute = is_selfroute(rtm->rtm_protocol);
594
595 if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE) {
6ab5222f
DS
596 if (IS_ZEBRA_DEBUG_KERNEL)
597 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
598 rtm->rtm_protocol);
d62a17ae 599 return 0;
6ab5222f 600 }
d62a17ae 601
602 /* We don't care about change notifications for the MPLS table. */
603 /* TODO: Revisit this. */
604 if (rtm->rtm_family == AF_MPLS)
605 return 0;
606
607 /* Table corresponding to route. */
608 if (tb[RTA_TABLE])
609 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
610 else
611 table = rtm->rtm_table;
612
613 /* Map to VRF */
78dd30b2 614 vrf_id = vrf_lookup_by_table(table, ns_id);
d62a17ae 615 if (vrf_id == VRF_DEFAULT) {
616 if (!is_zebra_valid_kernel_table(table)
617 && !is_zebra_main_routing_table(table))
618 return 0;
619 }
620
621 /* Route which inserted by Zebra. */
6a6d11a3 622 if (selfroute) {
d62a17ae 623 flags |= ZEBRA_FLAG_SELFROUTE;
38e40db1 624 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
915902cb 625 }
d62a17ae 626 if (tb[RTA_OIF])
627 index = *(int *)RTA_DATA(tb[RTA_OIF]);
628
629 if (tb[RTA_DST])
630 dest = RTA_DATA(tb[RTA_DST]);
631 else
632 dest = anyaddr;
633
634 if (tb[RTA_SRC])
635 src = RTA_DATA(tb[RTA_SRC]);
636 else
637 src = anyaddr;
638
639 if (tb[RTA_PREFSRC])
640 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
641
642 if (tb[RTA_GATEWAY])
643 gate = RTA_DATA(tb[RTA_GATEWAY]);
644
fcc89a9c
SW
645 if (tb[RTA_NH_ID])
646 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
647
f19435a8
DS
648 if (tb[RTA_PRIORITY])
649 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
d62a17ae 650
4e40b6d6
KK
651#if defined(SUPPORT_REALMS)
652 if (tb[RTA_FLOW])
653 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
654#endif
655
f19435a8
DS
656 if (tb[RTA_METRICS]) {
657 struct rtattr *mxrta[RTAX_MAX + 1];
d62a17ae 658
0d6f7fd6 659 memset(mxrta, 0, sizeof(mxrta));
996c9314 660 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
f19435a8 661 RTA_PAYLOAD(tb[RTA_METRICS]));
d62a17ae 662
f19435a8 663 if (mxrta[RTAX_MTU])
d7c0a89a 664 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
d62a17ae 665 }
666
667 if (rtm->rtm_family == AF_INET) {
668 p.family = AF_INET;
930571d2 669 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
e17d9b2d 670 zlog_err(
75829703 671 "Invalid destination prefix length: %u received from kernel route change",
930571d2 672 rtm->rtm_dst_len);
e17d9b2d 673 return -1;
930571d2 674 }
d62a17ae 675 memcpy(&p.u.prefix4, dest, 4);
676 p.prefixlen = rtm->rtm_dst_len;
677
1f610a1f
CF
678 if (rtm->rtm_src_len != 0) {
679 char buf[PREFIX_STRLEN];
9df414fe 680 flog_warn(
e914ccbe 681 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
9df414fe
QY
682 "unsupported IPv4 sourcedest route (dest %s vrf %u)",
683 prefix2str(&p, buf, sizeof(buf)), vrf_id);
1f610a1f
CF
684 return 0;
685 }
930571d2 686
1f610a1f
CF
687 /* Force debug below to not display anything for source */
688 src_p.prefixlen = 0;
d62a17ae 689 } else if (rtm->rtm_family == AF_INET6) {
690 p.family = AF_INET6;
930571d2 691 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
e17d9b2d 692 zlog_err(
75829703 693 "Invalid destination prefix length: %u received from kernel route change",
930571d2 694 rtm->rtm_dst_len);
e17d9b2d 695 return -1;
930571d2 696 }
d62a17ae 697 memcpy(&p.u.prefix6, dest, 16);
698 p.prefixlen = rtm->rtm_dst_len;
699
700 src_p.family = AF_INET6;
930571d2 701 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
e17d9b2d 702 zlog_err(
75829703 703 "Invalid source prefix length: %u received from kernel route change",
930571d2 704 rtm->rtm_src_len);
e17d9b2d 705 return -1;
930571d2 706 }
d62a17ae 707 memcpy(&src_p.prefix, src, 16);
708 src_p.prefixlen = rtm->rtm_src_len;
709 }
710
25715c7e
DS
711 /*
712 * For ZEBRA_ROUTE_KERNEL types:
713 *
714 * The metric/priority of the route received from the kernel
715 * is a 32 bit number. We are going to interpret the high
716 * order byte as the Admin Distance and the low order 3 bytes
717 * as the metric.
718 *
719 * This will allow us to do two things:
720 * 1) Allow the creation of kernel routes that can be
721 * overridden by zebra.
722 * 2) Allow the old behavior for 'most' kernel route types
723 * if a user enters 'ip route ...' v4 routes get a metric
724 * of 0 and v6 routes get a metric of 1024. Both of these
725 * values will end up with a admin distance of 0, which
726 * will cause them to win for the purposes of zebra.
727 */
728 if (proto == ZEBRA_ROUTE_KERNEL) {
729 distance = (metric >> 24) & 0xFF;
996c9314 730 metric = (metric & 0x00FFFFFF);
25715c7e
DS
731 }
732
d62a17ae 733 if (IS_ZEBRA_DEBUG_KERNEL) {
734 char buf[PREFIX_STRLEN];
735 char buf2[PREFIX_STRLEN];
bd47f3a3
JU
736 zlog_debug(
737 "%s %s%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
738 nl_msg_type_to_str(h->nlmsg_type),
739 prefix2str(&p, buf, sizeof(buf)),
740 src_p.prefixlen ? " from " : "",
741 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
742 : "",
743 vrf_id_to_name(vrf_id), vrf_id, table, metric,
744 distance);
d62a17ae 745 }
746
747 afi_t afi = AFI_IP;
748 if (rtm->rtm_family == AF_INET6)
749 afi = AFI_IP6;
750
751 if (h->nlmsg_type == RTM_NEWROUTE) {
8795f904 752
fd36be7e 753 if (!tb[RTA_MULTIPATH]) {
77a44d94 754 struct nexthop nh = {0};
8795f904 755
77a44d94
SW
756 if (!nhe_id) {
757 nh = parse_nexthop_unicast(
758 ns_id, rtm, tb, bh_type, index, prefsrc,
20822f9d 759 gate, afi, vrf_id);
87da6a60 760 }
4a7371e9 761 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
8032b717
SW
762 &src_p, &nh, nhe_id, table, metric, mtu,
763 distance, tag);
fd36be7e 764 } else {
d62a17ae 765 /* This is a multipath route */
d62a17ae 766 struct route_entry *re;
0eb97b86 767 struct nexthop_group *ng = NULL;
d62a17ae 768 struct rtnexthop *rtnh =
769 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
d62a17ae 770
771 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
915902cb 772 re->type = proto;
25715c7e 773 re->distance = distance;
d62a17ae 774 re->flags = flags;
775 re->metric = metric;
776 re->mtu = mtu;
777 re->vrf_id = vrf_id;
778 re->table = table;
98572489 779 re->uptime = monotime(NULL);
4e40b6d6 780 re->tag = tag;
bbb322f2 781 re->nhe_id = nhe_id;
3c04071d 782
20822f9d 783 if (!nhe_id) {
0eb97b86
MS
784 uint8_t nhop_num;
785
786 /* Use temporary list of nexthops; parse
787 * message payload's nexthops.
788 */
789 ng = nexthop_group_new();
790 nhop_num =
20822f9d 791 parse_multipath_nexthops_unicast(
0eb97b86 792 ns_id, ng, rtm, rtnh, tb,
20822f9d
SW
793 prefsrc, vrf_id);
794
795 zserv_nexthop_num_warn(
796 __func__, (const struct prefix *)&p,
797 nhop_num);
0eb97b86
MS
798
799 if (nhop_num == 0) {
800 nexthop_group_delete(&ng);
801 ng = NULL;
802 }
d62a17ae 803 }
804
0eb97b86 805 if (nhe_id || ng)
1f610a1f 806 rib_add_multipath(afi, SAFI_UNICAST, &p,
0eb97b86 807 &src_p, re, ng);
20822f9d
SW
808 else
809 XFREE(MTYPE_RE, re);
d62a17ae 810 }
811 } else {
bc541126
SW
812 if (nhe_id) {
813 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
814 &p, &src_p, NULL, nhe_id, table, metric,
815 distance, true);
816 } else {
817 if (!tb[RTA_MULTIPATH]) {
818 struct nexthop nh;
760f39dc
HS
819
820 nh = parse_nexthop_unicast(
821 ns_id, rtm, tb, bh_type, index, prefsrc,
822 gate, afi, vrf_id);
bc541126
SW
823 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
824 flags, &p, &src_p, &nh, 0, table,
825 metric, distance, true);
8ba5bd58 826 } else {
bc541126
SW
827 /* XXX: need to compare the entire list of
828 * nexthops here for NLM_F_APPEND stupidity */
829 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
830 flags, &p, &src_p, NULL, 0, table,
831 metric, distance, true);
8ba5bd58 832 }
d62a17ae 833 }
834 }
835
836 return 0;
718e3744 837}
838
e3be0432
DS
839static struct mcast_route_data *mroute = NULL;
840
2414abd3 841static int netlink_route_change_read_multicast(struct nlmsghdr *h,
d62a17ae 842 ns_id_t ns_id, int startup)
565fdc75 843{
d62a17ae 844 int len;
845 struct rtmsg *rtm;
846 struct rtattr *tb[RTA_MAX + 1];
847 struct mcast_route_data *m;
848 struct mcast_route_data mr;
849 int iif = 0;
850 int count;
851 int oif[256];
852 int oif_count = 0;
853 char sbuf[40];
854 char gbuf[40];
855 char oif_list[256] = "\0";
78dd30b2 856 vrf_id_t vrf;
43b5cc5e 857 int table;
d62a17ae 858
859 if (mroute)
860 m = mroute;
861 else {
862 memset(&mr, 0, sizeof(mr));
863 m = &mr;
864 }
865
866 rtm = NLMSG_DATA(h);
867
868 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
869
0d6f7fd6 870 memset(tb, 0, sizeof(tb));
d62a17ae 871 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
90d82769 872
43b5cc5e
DS
873 if (tb[RTA_TABLE])
874 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
875 else
876 table = rtm->rtm_table;
877
78dd30b2 878 vrf = vrf_lookup_by_table(table, ns_id);
43b5cc5e 879
d62a17ae 880 if (tb[RTA_IIF])
881 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
882
883 if (tb[RTA_SRC])
bd8b9272 884 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
d62a17ae 885
886 if (tb[RTA_DST])
bd8b9272 887 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
d62a17ae 888
62819462 889 if (tb[RTA_EXPIRES])
d62a17ae 890 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
891
892 if (tb[RTA_MULTIPATH]) {
893 struct rtnexthop *rtnh =
894 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
895
896 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
897 for (;;) {
898 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
899 break;
900
901 oif[oif_count] = rtnh->rtnh_ifindex;
902 oif_count++;
903
3c04071d
SW
904 if (rtnh->rtnh_len == 0)
905 break;
906
d62a17ae 907 len -= NLMSG_ALIGN(rtnh->rtnh_len);
908 rtnh = RTNH_NEXT(rtnh);
909 }
910 }
911
912 if (IS_ZEBRA_DEBUG_KERNEL) {
822c9af2
SW
913 struct interface *ifp = NULL;
914 struct zebra_vrf *zvrf = NULL;
915
0af35d90
RW
916 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
917 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
d62a17ae 918 for (count = 0; count < oif_count; count++) {
919 ifp = if_lookup_by_index(oif[count], vrf);
920 char temp[256];
921
772270f3
QY
922 snprintf(temp, sizeof(temp), "%s(%d) ",
923 ifp ? ifp->name : "Unknown", oif[count]);
eab4a5c2 924 strlcat(oif_list, temp, sizeof(oif_list));
d62a17ae 925 }
822c9af2 926 zvrf = zebra_vrf_lookup_by_id(vrf);
d62a17ae 927 ifp = if_lookup_by_index(iif, vrf);
822c9af2
SW
928 zlog_debug(
929 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s(%d) OIF: %s jiffies: %lld",
bd47f3a3
JU
930 zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
931 sbuf, gbuf, ifp ? ifp->name : "Unknown", iif, oif_list,
822c9af2 932 m->lastused);
90d82769 933 }
d62a17ae 934 return 0;
565fdc75
DS
935}
936
2414abd3 937int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
565fdc75 938{
d62a17ae 939 int len;
d62a17ae 940 struct rtmsg *rtm;
941
942 rtm = NLMSG_DATA(h);
943
944 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
945 /* If this is not route add/delete message print warning. */
9165c5f5 946 zlog_debug("Kernel message: %s NS %u",
87b5d1b0 947 nl_msg_type_to_str(h->nlmsg_type), ns_id);
d62a17ae 948 return 0;
949 }
950
c25e2f1a
DS
951 if (!(rtm->rtm_family == AF_INET ||
952 rtm->rtm_family == AF_INET6 ||
953 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
9df414fe 954 flog_warn(
e914ccbe 955 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
956 "Invalid address family: %u received from kernel route change: %s",
957 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
958 return 0;
959 }
960
d62a17ae 961 /* Connected route. */
962 if (IS_ZEBRA_DEBUG_KERNEL)
78dd30b2 963 zlog_debug("%s %s %s proto %s NS %u",
d62a17ae 964 nl_msg_type_to_str(h->nlmsg_type),
965 nl_family_to_str(rtm->rtm_family),
966 nl_rttype_to_str(rtm->rtm_type),
78dd30b2 967 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
d62a17ae 968
d62a17ae 969
970 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 971 if (len < 0) {
15569c58
DA
972 zlog_err(
973 "%s: Message received from netlink is of a broken size: %d %zu",
974 __func__, h->nlmsg_len,
975 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 976 return -1;
9bdf8618 977 }
d62a17ae 978
e655a03c 979 if (rtm->rtm_type == RTN_MULTICAST)
2414abd3 980 netlink_route_change_read_multicast(h, ns_id, startup);
e655a03c 981 else
2414abd3 982 netlink_route_change_read_unicast(h, ns_id, startup);
d62a17ae 983 return 0;
565fdc75
DS
984}
985
289602d7 986/* Request for specific route information from the kernel */
d62a17ae 987static int netlink_request_route(struct zebra_ns *zns, int family, int type)
289602d7 988{
d62a17ae 989 struct {
990 struct nlmsghdr n;
991 struct rtmsg rtm;
992 } req;
993
994 /* Form the request, specifying filter (rtattr) if needed. */
995 memset(&req, 0, sizeof(req));
996 req.n.nlmsg_type = type;
718f9b0f 997 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 998 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
999 req.rtm.rtm_family = family;
1000
fd3f8e52 1001 return netlink_request(&zns->netlink_cmd, &req);
289602d7 1002}
1003
718e3744 1004/* Routing table read function using netlink interface. Only called
1005 bootstrap time. */
d62a17ae 1006int netlink_route_read(struct zebra_ns *zns)
718e3744 1007{
d62a17ae 1008 int ret;
85a75f1e
MS
1009 struct zebra_dplane_info dp_info;
1010
1011 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 1012
1013 /* Get IPv4 routing table. */
1014 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1015 if (ret < 0)
1016 return ret;
1017 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 1018 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 1019 if (ret < 0)
1020 return ret;
1021
1022 /* Get IPv6 routing table. */
1023 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1024 if (ret < 0)
1025 return ret;
1026 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 1027 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 1028 if (ret < 0)
1029 return ret;
1030
1031 return 0;
718e3744 1032}
1033
0be6e7d7
JU
1034/*
1035 * The function returns true if the gateway info could be added
1036 * to the message, otherwise false is returned.
1037 */
1038static bool _netlink_route_add_gateway_info(uint8_t route_family,
312a6bee
JU
1039 uint8_t gw_family,
1040 struct nlmsghdr *nlmsg,
1041 size_t req_size, int bytelen,
1042 const struct nexthop *nexthop)
40c7bdb0 1043{
d62a17ae 1044 if (route_family == AF_MPLS) {
1045 struct gw_family_t gw_fam;
1046
1047 gw_fam.family = gw_family;
1048 if (gw_family == AF_INET)
1049 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1050 else
1051 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
0be6e7d7
JU
1052 if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1053 bytelen + 2))
1054 return false;
d62a17ae 1055 } else {
92d6f769
K
1056 if (!(nexthop->rparent
1057 && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1058 if (gw_family == AF_INET) {
1059 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1060 &nexthop->gate.ipv4, bytelen))
1061 return false;
1062 } else {
1063 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1064 &nexthop->gate.ipv6, bytelen))
1065 return false;
1066 }
0be6e7d7 1067 }
d62a17ae 1068 }
0be6e7d7
JU
1069
1070 return true;
40c7bdb0 1071}
1072
b7537db6
SW
1073static int build_label_stack(struct mpls_label_stack *nh_label,
1074 mpls_lse_t *out_lse, char *label_buf,
1075 size_t label_buf_size)
1076{
1077 char label_buf1[20];
1078 int num_labels = 0;
1079
1080 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1081 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1082 continue;
1083
1084 if (IS_ZEBRA_DEBUG_KERNEL) {
1085 if (!num_labels)
1086 sprintf(label_buf, "label %u",
1087 nh_label->label[i]);
1088 else {
772270f3
QY
1089 snprintf(label_buf1, sizeof(label_buf1), "/%u",
1090 nh_label->label[i]);
b7537db6
SW
1091 strlcat(label_buf, label_buf1, label_buf_size);
1092 }
1093 }
1094
1095 out_lse[num_labels] =
1096 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1097 num_labels++;
1098 }
1099
1100 return num_labels;
1101}
1102
a757997c
JU
1103static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label,
1104 struct nlmsghdr *nlmsg,
1105 size_t buflen, struct rtmsg *rtmsg,
1106 char *label_buf,
1107 size_t label_buf_size)
fa713d9e 1108{
d62a17ae 1109 mpls_lse_t out_lse[MPLS_MAX_LABELS];
a757997c 1110 int num_labels;
bd47f3a3 1111
d62a17ae 1112 /*
1113 * label_buf is *only* currently used within debugging.
1114 * As such when we assign it we are guarding it inside
1115 * a debug test. If you want to change this make sure
1116 * you fix this assumption
1117 */
1118 label_buf[0] = '\0';
d62a17ae 1119
a757997c
JU
1120 num_labels =
1121 build_label_stack(nh_label, out_lse, label_buf, label_buf_size);
fa712963
RW
1122
1123 if (num_labels) {
1124 /* Set the BoS bit */
1125 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1126
0be6e7d7 1127 if (rtmsg->rtm_family == AF_MPLS) {
a757997c 1128 if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
0be6e7d7
JU
1129 num_labels * sizeof(mpls_lse_t)))
1130 return false;
1131 } else {
fa712963 1132 struct rtattr *nest;
fa712963 1133
a757997c
JU
1134 if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1135 LWTUNNEL_ENCAP_MPLS))
0be6e7d7
JU
1136 return false;
1137
a757997c 1138 nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
0be6e7d7
JU
1139 if (!nest)
1140 return false;
1141
a757997c 1142 if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
0be6e7d7
JU
1143 &out_lse,
1144 num_labels * sizeof(mpls_lse_t)))
1145 return false;
312a6bee 1146 nl_attr_nest_end(nlmsg, nest);
66d42727 1147 }
0aabccc0 1148 }
fa713d9e 1149
a757997c
JU
1150 return true;
1151}
1152
1153static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1154 int family,
1155 struct nlmsghdr *nlmsg,
1156 size_t buflen, int bytelen)
1157{
1158 if (family == AF_INET) {
1159 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1160 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1161 &nexthop->rmap_src.ipv4, bytelen))
1162 return false;
1163 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1164 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1165 &nexthop->src.ipv4, bytelen))
1166 return false;
1167 }
1168 } else if (family == AF_INET6) {
1169 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1170 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1171 &nexthop->rmap_src.ipv6, bytelen))
1172 return false;
1173 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1174 if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1175 &nexthop->src.ipv6, bytelen))
1176 return false;
1177 }
1178 }
1179
1180 return true;
1181}
1182
1183/* This function takes a nexthop as argument and adds
1184 * the appropriate netlink attributes to an existing
1185 * netlink message.
1186 *
1187 * @param routedesc: Human readable description of route type
1188 * (direct/recursive, single-/multipath)
1189 * @param bytelen: Length of addresses in bytes.
1190 * @param nexthop: Nexthop information
1191 * @param nlmsg: nlmsghdr structure to fill in.
1192 * @param req_size: The size allocated for the message.
1193 *
1194 * The function returns true if the nexthop could be added
1195 * to the message, otherwise false is returned.
1196 */
1197static bool _netlink_route_build_singlepath(const struct prefix *p,
1198 const char *routedesc, int bytelen,
1199 const struct nexthop *nexthop,
1200 struct nlmsghdr *nlmsg,
1201 struct rtmsg *rtmsg,
1202 size_t req_size, int cmd)
1203{
1204
1205 char label_buf[256];
1206 struct vrf *vrf;
1207 char addrstr[INET6_ADDRSTRLEN];
1208
1209 assert(nexthop);
1210
1211 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1212
1213 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1214 req_size, rtmsg, label_buf,
1215 sizeof(label_buf)))
1216 return false;
1217
d62a17ae 1218 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1219 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1220
002e5c43 1221 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
d62a17ae 1222 rtmsg->rtm_flags |= RTNH_F_ONLINK;
0be6e7d7
JU
1223 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1224 return false;
1225 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1226 return false;
d62a17ae 1227
a757997c
JU
1228 if (cmd == RTM_NEWROUTE) {
1229 if (!_netlink_route_encode_nexthop_src(
1230 nexthop, AF_INET, nlmsg, req_size, bytelen))
0be6e7d7
JU
1231 return false;
1232 }
d62a17ae 1233
1234 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1235 zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1236 __func__, routedesc, p, ipv4_ll_buf,
1237 label_buf, nexthop->ifindex,
1238 VRF_LOGNAME(vrf), nexthop->vrf_id);
0be6e7d7 1239 return true;
0aabccc0
DD
1240 }
1241
d62a17ae 1242 if (nexthop->type == NEXTHOP_TYPE_IPV4
1243 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1244 /* Send deletes to the kernel without specifying the next-hop */
0be6e7d7
JU
1245 if (cmd != RTM_DELROUTE) {
1246 if (!_netlink_route_add_gateway_info(
1247 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1248 bytelen, nexthop))
1249 return false;
1250 }
d62a17ae 1251
1252 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1253 if (!_netlink_route_encode_nexthop_src(
1254 nexthop, AF_INET, nlmsg, req_size, bytelen))
1255 return false;
d62a17ae 1256 }
1257
9266b315
RZ
1258 if (IS_ZEBRA_DEBUG_KERNEL) {
1259 inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1260 sizeof(addrstr));
1261 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1262 __func__, routedesc, p, addrstr, label_buf,
1263 nexthop->ifindex, VRF_LOGNAME(vrf),
1264 nexthop->vrf_id);
1265 }
0aabccc0 1266 }
fa713d9e 1267
d62a17ae 1268 if (nexthop->type == NEXTHOP_TYPE_IPV6
1269 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
0be6e7d7
JU
1270 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1271 AF_INET6, nlmsg, req_size,
1272 bytelen, nexthop))
1273 return false;
d62a17ae 1274
1275 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1276 if (!_netlink_route_encode_nexthop_src(
1277 nexthop, AF_INET6, nlmsg, req_size,
1278 bytelen))
1279 return false;
d62a17ae 1280 }
fa713d9e 1281
9266b315
RZ
1282 if (IS_ZEBRA_DEBUG_KERNEL) {
1283 inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1284 sizeof(addrstr));
1285 zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1286 __func__, routedesc, p, addrstr, label_buf,
1287 nexthop->ifindex, VRF_LOGNAME(vrf),
1288 nexthop->vrf_id);
1289 }
d62a17ae 1290 }
5e210522
DS
1291
1292 /*
1293 * We have the ifindex so we should always send it
1294 * This is especially useful if we are doing route
1295 * leaking.
1296 */
0be6e7d7
JU
1297 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1298 if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1299 return false;
1300 }
d62a17ae 1301
275565fb 1302 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1303 if (cmd == RTM_NEWROUTE) {
a757997c
JU
1304 if (!_netlink_route_encode_nexthop_src(
1305 nexthop, AF_INET, nlmsg, req_size, bytelen))
1306 return false;
d62a17ae 1307 }
fa713d9e 1308
d62a17ae 1309 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1310 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1311 __func__, routedesc, p, nexthop->ifindex,
1312 VRF_LOGNAME(vrf), nexthop->vrf_id);
0aabccc0 1313 }
0be6e7d7
JU
1314
1315 return true;
fa713d9e
CF
1316}
1317
1318/* This function takes a nexthop as argument and
312a6bee 1319 * appends to the given netlink msg. If the nexthop
fa713d9e
CF
1320 * defines a preferred source, the src parameter
1321 * will be modified to point to that src, otherwise
1322 * it will be kept unmodified.
1323 *
1324 * @param routedesc: Human readable description of route type
1325 * (direct/recursive, single-/multipath)
1326 * @param bytelen: Length of addresses in bytes.
1327 * @param nexthop: Nexthop information
312a6bee
JU
1328 * @param nlmsg: nlmsghdr structure to fill in.
1329 * @param req_size: The size allocated for the message.
fa713d9e
CF
1330 * @param src: pointer pointing to a location where
1331 * the prefsrc should be stored.
0be6e7d7
JU
1332 *
1333 * The function returns true if the nexthop could be added
1334 * to the message, otherwise false is returned.
fa713d9e 1335 */
0be6e7d7 1336static bool _netlink_route_build_multipath(const struct prefix *p,
312a6bee
JU
1337 const char *routedesc, int bytelen,
1338 const struct nexthop *nexthop,
1339 struct nlmsghdr *nlmsg,
1340 size_t req_size, struct rtmsg *rtmsg,
1341 const union g_addr **src)
fa713d9e 1342{
9a62e84b 1343 char label_buf[256];
bd47f3a3 1344 struct vrf *vrf;
312a6bee 1345 struct rtnexthop *rtnh;
d62a17ae 1346
312a6bee 1347 rtnh = nl_attr_rtnh(nlmsg, req_size);
0be6e7d7
JU
1348 if (rtnh == NULL)
1349 return false;
d62a17ae 1350
b7537db6
SW
1351 assert(nexthop);
1352
bd47f3a3
JU
1353 vrf = vrf_lookup_by_id(nexthop->vrf_id);
1354
a757997c
JU
1355 if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg,
1356 req_size, rtmsg, label_buf,
1357 sizeof(label_buf)))
1358 return false;
d62a17ae 1359
1360 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1361 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1362
002e5c43 1363 if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
d62a17ae 1364 rtnh->rtnh_flags |= RTNH_F_ONLINK;
a757997c 1365 if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
0be6e7d7 1366 return false;
d62a17ae 1367 rtnh->rtnh_ifindex = nexthop->ifindex;
8d27e1aa 1368 if (nexthop->weight)
1369 rtnh->rtnh_hops = nexthop->weight - 1;
d62a17ae 1370
975a328e 1371 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1372 *src = &nexthop->rmap_src;
975a328e 1373 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1374 *src = &nexthop->src;
1375
1376 if (IS_ZEBRA_DEBUG_KERNEL)
1377 zlog_debug(
9266b315
RZ
1378 "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1379 __func__, routedesc, p, ipv4_ll_buf, label_buf,
bd47f3a3
JU
1380 nexthop->ifindex, VRF_LOGNAME(vrf),
1381 nexthop->vrf_id);
312a6bee 1382 nl_attr_rtnh_end(nlmsg, rtnh);
0be6e7d7 1383 return true;
d62a17ae 1384 }
1385
1386 if (nexthop->type == NEXTHOP_TYPE_IPV4
1387 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
0be6e7d7
JU
1388 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1389 nlmsg, req_size, bytelen,
1390 nexthop))
1391 return false;
1392
975a328e 1393 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1394 *src = &nexthop->rmap_src;
975a328e 1395 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1396 *src = &nexthop->src;
1397
a50404aa
RZ
1398 if (IS_ZEBRA_DEBUG_KERNEL)
1399 zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1400 __func__, routedesc, p, &nexthop->gate.ipv4,
1401 label_buf, nexthop->ifindex,
1402 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1403 }
1404 if (nexthop->type == NEXTHOP_TYPE_IPV6
1405 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
0be6e7d7
JU
1406 if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1407 AF_INET6, nlmsg, req_size,
1408 bytelen, nexthop))
1409 return false;
d62a17ae 1410
1411 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1412 *src = &nexthop->rmap_src;
1413 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1414 *src = &nexthop->src;
1415
a50404aa
RZ
1416 if (IS_ZEBRA_DEBUG_KERNEL)
1417 zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1418 __func__, routedesc, p, &nexthop->gate.ipv6,
1419 label_buf, nexthop->ifindex,
1420 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1421 }
5e210522
DS
1422
1423 /*
1424 * We have figured out the ifindex so we should always send it
1425 * This is especially useful if we are doing route
1426 * leaking.
1427 */
1428 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1429 rtnh->rtnh_ifindex = nexthop->ifindex;
1430
d62a17ae 1431 /* ifindex */
275565fb 1432 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
975a328e 1433 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1434 *src = &nexthop->rmap_src;
975a328e 1435 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1436 *src = &nexthop->src;
1437
1438 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315
RZ
1439 zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1440 __func__, routedesc, p, nexthop->ifindex,
1441 VRF_LOGNAME(vrf), nexthop->vrf_id);
d62a17ae 1442 }
df7fb580
DS
1443
1444 if (nexthop->weight)
1445 rtnh->rtnh_hops = nexthop->weight - 1;
0be6e7d7 1446
312a6bee 1447 nl_attr_rtnh_end(nlmsg, rtnh);
0be6e7d7 1448 return true;
fa713d9e
CF
1449}
1450
0be6e7d7 1451static inline bool _netlink_mpls_build_singlepath(const struct prefix *p,
9a0132a5 1452 const char *routedesc,
81793ac1 1453 const zebra_nhlfe_t *nhlfe,
d62a17ae 1454 struct nlmsghdr *nlmsg,
1455 struct rtmsg *rtmsg,
1456 size_t req_size, int cmd)
40c7bdb0 1457{
d62a17ae 1458 int bytelen;
d7c0a89a 1459 uint8_t family;
40c7bdb0 1460
d62a17ae 1461 family = NHLFE_FAMILY(nhlfe);
1462 bytelen = (family == AF_INET ? 4 : 16);
0be6e7d7
JU
1463 return _netlink_route_build_singlepath(p, routedesc, bytelen,
1464 nhlfe->nexthop, nlmsg, rtmsg,
1465 req_size, cmd);
40c7bdb0 1466}
1467
1468
0be6e7d7 1469static inline bool
9a0132a5 1470_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
312a6bee
JU
1471 const zebra_nhlfe_t *nhlfe,
1472 struct nlmsghdr *nlmsg, size_t req_size,
1473 struct rtmsg *rtmsg, const union g_addr **src)
40c7bdb0 1474{
d62a17ae 1475 int bytelen;
d7c0a89a 1476 uint8_t family;
40c7bdb0 1477
d62a17ae 1478 family = NHLFE_FAMILY(nhlfe);
1479 bytelen = (family == AF_INET ? 4 : 16);
0be6e7d7
JU
1480 return _netlink_route_build_multipath(p, routedesc, bytelen,
1481 nhlfe->nexthop, nlmsg, req_size,
1482 rtmsg, src);
40c7bdb0 1483}
1484
d7c0a89a 1485static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
40c7bdb0 1486{
d62a17ae 1487 if (IS_ZEBRA_DEBUG_KERNEL)
0be6e7d7
JU
1488 zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1489 routedesc, nl_msg_type_to_str(cmd), label);
fa713d9e
CF
1490}
1491
d62a17ae 1492static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
5895d33f 1493 int llalen, ns_id_t ns_id)
5c610faf 1494{
f3dbec60 1495 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 1496 struct {
1497 struct nlmsghdr n;
1498 struct ndmsg ndm;
1499 char buf[256];
1500 } req;
5c610faf 1501
5895d33f 1502 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
8f7d9fc0 1503
5605ecfc 1504 memset(&req, 0, sizeof(req));
5c610faf 1505
d62a17ae 1506 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1507 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1508 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1509 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
a55ba23f 1510
d62a17ae 1511 req.ndm.ndm_family = AF_INET;
1512 req.ndm.ndm_state = NUD_PERMANENT;
1513 req.ndm.ndm_ifindex = ifindex;
1514 req.ndm.ndm_type = RTN_UNICAST;
5c610faf 1515
312a6bee
JU
1516 nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
1517 sizeof(protocol));
a757997c 1518 nl_attr_put32(&req.n, sizeof(req), NDA_DST, addr);
312a6bee 1519 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
5c610faf 1520
d62a17ae 1521 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1522 0);
5c610faf
DS
1523}
1524
762288f5
SW
1525static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1526 union g_addr *src)
1527{
1528 if (family == AF_INET) {
1529 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1530 src->ipv4 = nexthop->rmap_src.ipv4;
1531 return true;
1532 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1533 src->ipv4 = nexthop->src.ipv4;
1534 return true;
1535 }
1536 } else if (family == AF_INET6) {
1537 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1538 src->ipv6 = nexthop->rmap_src.ipv6;
1539 return true;
1540 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1541 src->ipv6 = nexthop->src.ipv6;
1542 return true;
1543 }
1544 }
1545
1546 return false;
1547}
1548
0be6e7d7
JU
1549/*
1550 * The function returns true if the attribute could be added
1551 * to the message, otherwise false is returned.
1552 */
1553static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
1554 struct nexthop *nh)
f2a0ba3a
RZ
1555{
1556 struct rtattr *nest;
1557
1558 switch (nh->nh_encap_type) {
1559 case NET_VXLAN:
a757997c 1560 if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
0be6e7d7 1561 return false;
f2a0ba3a 1562
312a6bee 1563 nest = nl_attr_nest(n, nlen, RTA_ENCAP);
0be6e7d7
JU
1564 if (!nest)
1565 return false;
1566
1567 if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
1568 nh->nh_encap.vni))
1569 return false;
312a6bee 1570 nl_attr_nest_end(n, nest);
f2a0ba3a
RZ
1571 break;
1572 }
0be6e7d7
JU
1573
1574 return true;
f2a0ba3a
RZ
1575}
1576
7cdb1a84
MS
1577/*
1578 * Routing table change via netlink interface, using a dataplane context object
0be6e7d7
JU
1579 *
1580 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
1581 * otherwise the number of bytes written to buf.
7cdb1a84 1582 */
0be6e7d7
JU
1583ssize_t netlink_route_multipath_msg_encode(int cmd,
1584 struct zebra_dplane_ctx *ctx,
1585 uint8_t *data, size_t datalen,
1586 bool fpm, bool force_nhg)
7cdb1a84
MS
1587{
1588 int bytelen;
7cdb1a84
MS
1589 struct nexthop *nexthop = NULL;
1590 unsigned int nexthop_num;
7cdb1a84 1591 const char *routedesc;
762288f5 1592 bool setsrc = false;
7cdb1a84
MS
1593 union g_addr src;
1594 const struct prefix *p, *src_p;
1595 uint32_t table_id;
1596
1597 struct {
1598 struct nlmsghdr n;
1599 struct rtmsg r;
e57a3fab
RZ
1600 char buf[];
1601 } *req = (void *)data;
7cdb1a84
MS
1602
1603 p = dplane_ctx_get_dest(ctx);
1604 src_p = dplane_ctx_get_src(ctx);
1605
0be6e7d7
JU
1606 if (datalen < sizeof(*req))
1607 return 0;
1608
e57a3fab 1609 memset(req, 0, sizeof(*req));
7cdb1a84 1610
b9c87515 1611 bytelen = (p->family == AF_INET ? 4 : 16);
7cdb1a84 1612
e57a3fab
RZ
1613 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1614 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
7cdb1a84 1615
334734a8
DS
1616 if ((cmd == RTM_NEWROUTE) &&
1617 ((p->family == AF_INET) || v6_rr_semantics))
e57a3fab 1618 req->n.nlmsg_flags |= NLM_F_REPLACE;
7cdb1a84 1619
e57a3fab 1620 req->n.nlmsg_type = cmd;
7cdb1a84 1621
e57a3fab 1622 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
7cdb1a84 1623
b9c87515 1624 req->r.rtm_family = p->family;
e57a3fab
RZ
1625 req->r.rtm_dst_len = p->prefixlen;
1626 req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1627 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
7cdb1a84 1628
5709131c 1629 if (cmd == RTM_DELROUTE)
e57a3fab 1630 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
5709131c 1631 else
e57a3fab 1632 req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
7cdb1a84
MS
1633
1634 /*
1635 * blackhole routes are not RTN_UNICAST, they are
1636 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1637 * so setting this value as a RTN_UNICAST would
1638 * cause the route lookup of just the prefix
1639 * to fail. So no need to specify this for
1640 * the RTM_DELROUTE case
1641 */
1642 if (cmd != RTM_DELROUTE)
e57a3fab 1643 req->r.rtm_type = RTN_UNICAST;
7cdb1a84 1644
0be6e7d7
JU
1645 if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
1646 return 0;
1647 if (src_p) {
1648 if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
1649 bytelen))
1650 return 0;
1651 }
7cdb1a84
MS
1652
1653 /* Metric. */
1654 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1655 * used
1656 * either by the kernel or by zebra. Its purely for calculating best
1657 * path(s)
1658 * by the routing protocol and for communicating with protocol peers.
1659 */
0be6e7d7
JU
1660 if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
1661 NL_DEFAULT_ROUTE_METRIC))
1662 return 0;
7cdb1a84
MS
1663
1664#if defined(SUPPORT_REALMS)
1665 {
1666 route_tag_t tag;
1667
5709131c 1668 if (cmd == RTM_DELROUTE)
7cdb1a84 1669 tag = dplane_ctx_get_old_tag(ctx);
5709131c 1670 else
7cdb1a84 1671 tag = dplane_ctx_get_tag(ctx);
7cdb1a84 1672
0be6e7d7
JU
1673 if (tag > 0 && tag <= 255) {
1674 if (!nl_attr_put32(&req->n, datalen, RTA_FLOW, tag))
1675 return 0;
1676 }
7cdb1a84
MS
1677 }
1678#endif
1679 /* Table corresponding to this route. */
1680 table_id = dplane_ctx_get_table(ctx);
1681 if (table_id < 256)
e57a3fab 1682 req->r.rtm_table = table_id;
7cdb1a84 1683 else {
e57a3fab 1684 req->r.rtm_table = RT_TABLE_UNSPEC;
0be6e7d7
JU
1685 if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
1686 return 0;
7cdb1a84
MS
1687 }
1688
9266b315
RZ
1689 if (IS_ZEBRA_DEBUG_KERNEL)
1690 zlog_debug(
1691 "%s: %s %pFX vrf %u(%u)", __func__,
1692 nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
1693 table_id);
7cdb1a84
MS
1694
1695 /*
1696 * If we are not updating the route and we have received
1697 * a route delete, then all we need to fill in is the
1698 * prefix information to tell the kernel to schwack
1699 * it.
1700 */
1701 if (cmd == RTM_DELROUTE)
0be6e7d7 1702 return NLMSG_ALIGN(req->n.nlmsg_len);
7cdb1a84
MS
1703
1704 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
312a6bee 1705 struct rtattr *nest;
7cdb1a84
MS
1706 uint32_t mtu = dplane_ctx_get_mtu(ctx);
1707 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
5709131c 1708
7cdb1a84
MS
1709 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
1710 mtu = nexthop_mtu;
312a6bee
JU
1711
1712 nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
0be6e7d7
JU
1713 if (nest == NULL)
1714 return 0;
1715
1716 if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
1717 return 0;
312a6bee 1718 nl_attr_nest_end(&req->n, nest);
7cdb1a84
MS
1719 }
1720
013fef4c 1721 if ((!fpm && kernel_nexthops_supported()) || (fpm && force_nhg)) {
d8bfd8dc 1722 /* Kernel supports nexthop objects */
9a0132a5 1723 if (IS_ZEBRA_DEBUG_KERNEL)
0be6e7d7
JU
1724 zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
1725 dplane_ctx_get_nhe_id(ctx));
e57a3fab 1726
0be6e7d7
JU
1727 if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
1728 dplane_ctx_get_nhe_id(ctx)))
1729 return 0;
d8bfd8dc
SW
1730
1731 /* Have to determine src still */
1732 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1733 if (setsrc)
1734 break;
1735
e57a3fab 1736 setsrc = nexthop_set_src(nexthop, p->family, &src);
d8bfd8dc
SW
1737 }
1738
1739 if (setsrc) {
0be6e7d7
JU
1740 if (p->family == AF_INET) {
1741 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1742 &src.ipv4, bytelen))
1743 return 0;
1744 } else if (p->family == AF_INET6) {
1745 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1746 &src.ipv6, bytelen))
1747 return 0;
1748 }
d8bfd8dc 1749 }
f78fe8f3 1750
0be6e7d7 1751 return NLMSG_ALIGN(req->n.nlmsg_len);
de3f5488
SW
1752 }
1753
7cdb1a84 1754 /* Count overall nexthops so we can decide whether to use singlepath
5709131c
MS
1755 * or multipath case.
1756 */
7cdb1a84
MS
1757 nexthop_num = 0;
1758 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1759 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1760 continue;
b9c87515 1761 if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
7cdb1a84
MS
1762 continue;
1763
1764 nexthop_num++;
1765 }
1766
1767 /* Singlepath case. */
220f0f42 1768 if (nexthop_num == 1) {
7cdb1a84
MS
1769 nexthop_num = 0;
1770 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1771 /*
1772 * So we want to cover 2 types of blackhole
1773 * routes here:
1774 * 1) A normal blackhole route( ala from a static
1775 * install.
1776 * 2) A recursively resolved blackhole route
1777 */
1778 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1779 switch (nexthop->bh_type) {
1780 case BLACKHOLE_ADMINPROHIB:
e57a3fab 1781 req->r.rtm_type = RTN_PROHIBIT;
7cdb1a84
MS
1782 break;
1783 case BLACKHOLE_REJECT:
e57a3fab 1784 req->r.rtm_type = RTN_UNREACHABLE;
7cdb1a84
MS
1785 break;
1786 default:
e57a3fab 1787 req->r.rtm_type = RTN_BLACKHOLE;
7cdb1a84
MS
1788 break;
1789 }
0be6e7d7 1790 return NLMSG_ALIGN(req->n.nlmsg_len);
7cdb1a84
MS
1791 }
1792 if (CHECK_FLAG(nexthop->flags,
1793 NEXTHOP_FLAG_RECURSIVE)) {
5709131c
MS
1794
1795 if (setsrc)
1796 continue;
1797
b9c87515
RZ
1798 setsrc = nexthop_set_src(nexthop, p->family,
1799 &src);
f183e380 1800 continue;
7cdb1a84
MS
1801 }
1802
b9c87515 1803 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
7cdb1a84
MS
1804 routedesc = nexthop->rparent
1805 ? "recursive, single-path"
1806 : "single-path";
1807
0be6e7d7
JU
1808 if (!_netlink_route_build_singlepath(
1809 p, routedesc, bytelen, nexthop,
1810 &req->n, &req->r, datalen, cmd))
1811 return 0;
7cdb1a84
MS
1812 nexthop_num++;
1813 break;
1814 }
f2a0ba3a
RZ
1815
1816 /*
1817 * Add encapsulation information when installing via
1818 * FPM.
1819 */
0be6e7d7
JU
1820 if (fpm) {
1821 if (!netlink_route_nexthop_encap(
1822 &req->n, datalen, nexthop))
1823 return 0;
1824 }
7cdb1a84 1825 }
f2a0ba3a 1826
13e0321a 1827 if (setsrc) {
0be6e7d7
JU
1828 if (p->family == AF_INET) {
1829 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1830 &src.ipv4, bytelen))
1831 return 0;
1832 } else if (p->family == AF_INET6) {
1833 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1834 &src.ipv6, bytelen))
1835 return 0;
1836 }
7cdb1a84
MS
1837 }
1838 } else { /* Multipath case */
312a6bee 1839 struct rtattr *nest;
81793ac1 1840 const union g_addr *src1 = NULL;
7cdb1a84 1841
312a6bee 1842 nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
0be6e7d7
JU
1843 if (nest == NULL)
1844 return 0;
7cdb1a84
MS
1845
1846 nexthop_num = 0;
1847 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
7cdb1a84
MS
1848 if (CHECK_FLAG(nexthop->flags,
1849 NEXTHOP_FLAG_RECURSIVE)) {
1850 /* This only works for IPv4 now */
5709131c
MS
1851 if (setsrc)
1852 continue;
1853
b9c87515
RZ
1854 setsrc = nexthop_set_src(nexthop, p->family,
1855 &src);
78e54ded 1856 continue;
7cdb1a84
MS
1857 }
1858
b9c87515 1859 if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
7cdb1a84
MS
1860 routedesc = nexthop->rparent
1861 ? "recursive, multipath"
1862 : "multipath";
1863 nexthop_num++;
1864
0be6e7d7
JU
1865 if (!_netlink_route_build_multipath(
1866 p, routedesc, bytelen, nexthop,
1867 &req->n, datalen, &req->r, &src1))
1868 return 0;
7cdb1a84
MS
1869
1870 if (!setsrc && src1) {
b9c87515 1871 if (p->family == AF_INET)
7cdb1a84 1872 src.ipv4 = src1->ipv4;
b9c87515 1873 else if (p->family == AF_INET6)
7cdb1a84
MS
1874 src.ipv6 = src1->ipv6;
1875
1876 setsrc = 1;
1877 }
1878 }
312a6bee 1879 }
0be6e7d7 1880
312a6bee
JU
1881 nl_attr_nest_end(&req->n, nest);
1882
1883 /*
1884 * Add encapsulation information when installing via
1885 * FPM.
1886 */
1887 if (fpm) {
1888 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
1889 nexthop)) {
1890 if (CHECK_FLAG(nexthop->flags,
1891 NEXTHOP_FLAG_RECURSIVE))
1892 continue;
0be6e7d7
JU
1893 if (!netlink_route_nexthop_encap(
1894 &req->n, datalen, nexthop))
1895 return 0;
312a6bee 1896 }
7cdb1a84 1897 }
f2a0ba3a 1898
312a6bee 1899
13e0321a 1900 if (setsrc) {
0be6e7d7
JU
1901 if (p->family == AF_INET) {
1902 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1903 &src.ipv4, bytelen))
1904 return 0;
1905 } else if (p->family == AF_INET6) {
1906 if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
1907 &src.ipv6, bytelen))
1908 return 0;
1909 }
7cdb1a84
MS
1910 if (IS_ZEBRA_DEBUG_KERNEL)
1911 zlog_debug("Setting source");
1912 }
7cdb1a84
MS
1913 }
1914
1915 /* If there is no useful nexthop then return. */
1916 if (nexthop_num == 0) {
1917 if (IS_ZEBRA_DEBUG_KERNEL)
9266b315 1918 zlog_debug("%s: No useful nexthop.", __func__);
7cdb1a84
MS
1919 }
1920
312a6bee 1921 return NLMSG_ALIGN(req->n.nlmsg_len);
7cdb1a84
MS
1922}
1923
43b5cc5e 1924int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
e3be0432 1925{
5523c156 1926 uint32_t actual_table;
d62a17ae 1927 int suc = 0;
1928 struct mcast_route_data *mr = (struct mcast_route_data *)in;
bd8b9272
DS
1929 struct {
1930 struct nlmsghdr n;
1931 struct ndmsg ndm;
1932 char buf[256];
1933 } req;
e3be0432 1934
d62a17ae 1935 mroute = mr;
5895d33f 1936 struct zebra_ns *zns;
bd8b9272 1937
009f8ad5 1938 zns = zvrf->zns;
5605ecfc 1939 memset(&req, 0, sizeof(req));
bd8b9272
DS
1940
1941 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1942 req.n.nlmsg_flags = NLM_F_REQUEST;
1943 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1944
1945 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1946 req.n.nlmsg_type = RTM_GETROUTE;
1947
a757997c
JU
1948 nl_attr_put32(&req.n, sizeof(req), RTA_IIF, mroute->ifindex);
1949 nl_attr_put32(&req.n, sizeof(req), RTA_OIF, mroute->ifindex);
1950 nl_attr_put32(&req.n, sizeof(req), RTA_SRC, mroute->sg.src.s_addr);
1951 nl_attr_put32(&req.n, sizeof(req), RTA_DST, mroute->sg.grp.s_addr);
5523c156
DS
1952 /*
1953 * What?
1954 *
1955 * So during the namespace cleanup we started storing
1956 * the zvrf table_id for the default table as RT_TABLE_MAIN
1957 * which is what the normal routing table for ip routing is.
1958 * This change caused this to break our lookups of sg data
1959 * because prior to this change the zvrf->table_id was 0
1960 * and when the pim multicast kernel code saw a 0,
1961 * it was auto-translated to RT_TABLE_DEFAULT. But since
1962 * we are now passing in RT_TABLE_MAIN there is no auto-translation
1963 * and the kernel goes screw you and the delicious cookies you
1964 * are trying to give me. So now we have this little hack.
1965 */
1966 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
1967 zvrf->table_id;
a757997c 1968 nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
e3be0432 1969
bd8b9272
DS
1970 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1971 &zns->netlink_cmd, zns, 0);
e3be0432 1972
bd8b9272 1973 mroute = NULL;
d62a17ae 1974 return suc;
e3be0432
DS
1975}
1976
8d03bc50
SW
1977/* Char length to debug ID with */
1978#define ID_LENGTH 10
1979
0be6e7d7 1980static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
8d03bc50 1981 uint32_t id,
e22e8001 1982 const struct nh_grp *z_grp,
0c8215cb 1983 const uint8_t count)
565ce0d3 1984{
565ce0d3 1985 struct nexthop_grp grp[count];
8d03bc50
SW
1986 /* Need space for max group size, "/", and null term */
1987 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
1988 char buf1[ID_LENGTH + 2];
1989
1990 buf[0] = '\0';
565ce0d3
SW
1991
1992 memset(grp, 0, sizeof(grp));
1993
1994 if (count) {
0c8215cb 1995 for (int i = 0; i < count; i++) {
e22e8001 1996 grp[i].id = z_grp[i].id;
df7fb580 1997 grp[i].weight = z_grp[i].weight - 1;
8d03bc50
SW
1998
1999 if (IS_ZEBRA_DEBUG_KERNEL) {
2000 if (i == 0)
2001 snprintf(buf, sizeof(buf1), "group %u",
2002 grp[i].id);
2003 else {
2004 snprintf(buf1, sizeof(buf1), "/%u",
2005 grp[i].id);
2006 strlcat(buf, buf1, sizeof(buf));
2007 }
2008 }
565ce0d3 2009 }
0be6e7d7
JU
2010 if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2011 count * sizeof(*grp)))
2012 return false;
565ce0d3 2013 }
8d03bc50
SW
2014
2015 if (IS_ZEBRA_DEBUG_KERNEL)
2016 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
0be6e7d7
JU
2017
2018 return true;
565ce0d3
SW
2019}
2020
f820d025 2021/**
e9a1cd93 2022 * Next hop packet encoding helper function.
f820d025 2023 *
e9a1cd93
RZ
2024 * \param[in] cmd netlink command.
2025 * \param[in] ctx dataplane context (information snapshot).
2026 * \param[out] buf buffer to hold the packet.
2027 * \param[in] buflen amount of buffer bytes.
f820d025 2028 *
0be6e7d7
JU
2029 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2030 * otherwise the number of bytes written to buf.
f820d025 2031 */
0be6e7d7
JU
2032ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2033 const struct zebra_dplane_ctx *ctx,
2034 void *buf, size_t buflen)
f820d025 2035{
f820d025
SW
2036 struct {
2037 struct nlmsghdr n;
2038 struct nhmsg nhm;
e9a1cd93
RZ
2039 char buf[];
2040 } *req = buf;
f820d025 2041
8d03bc50
SW
2042 mpls_lse_t out_lse[MPLS_MAX_LABELS];
2043 char label_buf[256];
2044 int num_labels = 0;
81505946 2045
8d03bc50
SW
2046 label_buf[0] = '\0';
2047
0be6e7d7
JU
2048 if (buflen < sizeof(*req))
2049 return 0;
2050
2051 memset(req, 0, sizeof(*req));
f820d025 2052
e9a1cd93
RZ
2053 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2054 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
9a1588c4
SW
2055
2056 if (cmd == RTM_NEWNEXTHOP)
e9a1cd93 2057 req->n.nlmsg_flags |= NLM_F_REPLACE;
9a1588c4 2058
e9a1cd93
RZ
2059 req->n.nlmsg_type = cmd;
2060 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
f820d025 2061
e9a1cd93 2062 req->nhm.nh_family = AF_UNSPEC;
fec211ad 2063 /* TODO: Scope? */
f820d025 2064
0c8215cb
SW
2065 uint32_t id = dplane_ctx_get_nhe_id(ctx);
2066
2067 if (!id) {
f820d025
SW
2068 flog_err(
2069 EC_ZEBRA_NHG_FIB_UPDATE,
2070 "Failed trying to update a nexthop group in the kernel that does not have an ID");
2071 return -1;
2072 }
2073
0be6e7d7
JU
2074 if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2075 return 0;
f820d025
SW
2076
2077 if (cmd == RTM_NEWNEXTHOP) {
bf1626a6
MS
2078 /*
2079 * We distinguish between a "group", which is a collection
2080 * of ids, and a singleton nexthop with an id. The
2081 * group is installed as an id that just refers to a list of
2082 * other ids.
2083 */
0be6e7d7 2084 if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
d52c949b 2085 if (!_netlink_nexthop_build_group(
0be6e7d7
JU
2086 &req->n, buflen, id,
2087 dplane_ctx_get_nhe_nh_grp(ctx),
2088 dplane_ctx_get_nhe_nh_grp_count(ctx)))
2089 return 0;
2090 } else {
0c8215cb
SW
2091 const struct nexthop *nh =
2092 dplane_ctx_get_nhe_ng(ctx)->nexthop;
2093 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
e8b0e420 2094
0c8215cb 2095 if (afi == AFI_IP)
e9a1cd93 2096 req->nhm.nh_family = AF_INET;
0c8215cb 2097 else if (afi == AFI_IP6)
e9a1cd93 2098 req->nhm.nh_family = AF_INET6;
f820d025 2099
565ce0d3 2100 switch (nh->type) {
a6e6a6d8 2101 case NEXTHOP_TYPE_IPV4:
565ce0d3 2102 case NEXTHOP_TYPE_IPV4_IFINDEX:
0be6e7d7
JU
2103 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2104 &nh->gate.ipv4,
2105 IPV4_MAX_BYTELEN))
2106 return 0;
565ce0d3 2107 break;
a6e6a6d8 2108 case NEXTHOP_TYPE_IPV6:
565ce0d3 2109 case NEXTHOP_TYPE_IPV6_IFINDEX:
0be6e7d7
JU
2110 if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2111 &nh->gate.ipv6,
2112 IPV6_MAX_BYTELEN))
2113 return 0;
565ce0d3
SW
2114 break;
2115 case NEXTHOP_TYPE_BLACKHOLE:
0be6e7d7
JU
2116 if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2117 NULL, 0))
2118 return 0;
8d03bc50
SW
2119 /* Blackhole shouldn't have anymore attributes
2120 */
2121 goto nexthop_done;
565ce0d3
SW
2122 case NEXTHOP_TYPE_IFINDEX:
2123 /* Don't need anymore info for this */
2124 break;
a6e6a6d8
SW
2125 }
2126
2127 if (!nh->ifindex) {
565ce0d3
SW
2128 flog_err(
2129 EC_ZEBRA_NHG_FIB_UPDATE,
2130 "Context received for kernel nexthop update without an interface");
2131 return -1;
565ce0d3
SW
2132 }
2133
0be6e7d7
JU
2134 if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2135 nh->ifindex))
2136 return 0;
8d03bc50 2137
62d2ecb2 2138 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
e9a1cd93 2139 req->nhm.nh_flags |= RTNH_F_ONLINK;
62d2ecb2 2140
8d03bc50
SW
2141 num_labels =
2142 build_label_stack(nh->nh_label, out_lse,
2143 label_buf, sizeof(label_buf));
2144
2145 if (num_labels) {
2146 /* Set the BoS bit */
2147 out_lse[num_labels - 1] |=
2148 htonl(1 << MPLS_LS_S_SHIFT);
2149
2150 /*
2151 * TODO: MPLS unsupported for now in kernel.
2152 */
e9a1cd93 2153 if (req->nhm.nh_family == AF_MPLS)
8d03bc50
SW
2154 goto nexthop_done;
2155#if 0
0be6e7d7 2156 if (!nl_attr_put(&req->n, buflen, NHA_NEWDST,
8d03bc50
SW
2157 &out_lse,
2158 num_labels
0be6e7d7
JU
2159 * sizeof(mpls_lse_t)))
2160 return 0;
8d03bc50
SW
2161#endif
2162 else {
2163 struct rtattr *nest;
2164 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
2165
a757997c
JU
2166 if (!nl_attr_put16(&req->n, buflen,
2167 NHA_ENCAP_TYPE,
2168 encap))
0be6e7d7 2169 return 0;
312a6bee 2170 nest = nl_attr_nest(&req->n, buflen,
8d03bc50 2171 NHA_ENCAP);
0be6e7d7
JU
2172 if (!nest)
2173 return 0;
2174 if (!nl_attr_put(
2175 &req->n, buflen,
2176 MPLS_IPTUNNEL_DST, &out_lse,
2177 num_labels
2178 * sizeof(
2179 mpls_lse_t)))
2180 return 0;
312a6bee 2181 nl_attr_nest_end(&req->n, nest);
8d03bc50
SW
2182 }
2183 }
2184
bf1626a6
MS
2185nexthop_done:
2186
2187 if (IS_ZEBRA_DEBUG_KERNEL)
2c77ddee
DS
2188 zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2189 __func__, id, nh, nh->ifindex,
bd47f3a3
JU
2190 vrf_id_to_name(nh->vrf_id),
2191 nh->vrf_id, label_buf);
0be6e7d7 2192}
f820d025 2193
e9a1cd93
RZ
2194 req->nhm.nh_protocol =
2195 zebra2proto(dplane_ctx_get_nhe_type(ctx));
f820d025 2196
f820d025
SW
2197 } else if (cmd != RTM_DELNEXTHOP) {
2198 flog_err(
2199 EC_ZEBRA_NHG_FIB_UPDATE,
2200 "Nexthop group kernel update command (%d) does not exist",
2201 cmd);
2202 return -1;
2203 }
2204
9266b315
RZ
2205 if (IS_ZEBRA_DEBUG_KERNEL)
2206 zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2207 id);
f820d025 2208
e9a1cd93 2209 return NLMSG_ALIGN(req->n.nlmsg_len);
f820d025
SW
2210}
2211
2212/**
2213 * kernel_nexthop_update() - Update/delete a nexthop from the kernel
2214 *
2215 * @ctx: Dataplane context
2216 *
2217 * Return: Dataplane result flag
2218 */
2219enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx)
2220{
bf1626a6 2221 enum dplane_op_e op;
98cda54a
SW
2222 int cmd = 0;
2223 int ret = 0;
e9a1cd93 2224 char buf[NL_PKT_BUF_SIZE];
f820d025 2225
bf1626a6
MS
2226 op = dplane_ctx_get_op(ctx);
2227 if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
f820d025 2228 cmd = RTM_NEWNEXTHOP;
bf1626a6
MS
2229 else if (op == DPLANE_OP_NH_DELETE)
2230 cmd = RTM_DELNEXTHOP;
2231 else {
2232 flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2233 "Context received for kernel nexthop update with incorrect OP code (%u)",
2234 op);
f820d025 2235 return ZEBRA_DPLANE_REQUEST_FAILURE;
f820d025
SW
2236 }
2237
e9a1cd93
RZ
2238 /* Nothing to do if the kernel doesn't support nexthop objects */
2239 if (!kernel_nexthops_supported())
2240 return ZEBRA_DPLANE_REQUEST_SUCCESS;
2241
0be6e7d7 2242 if (netlink_nexthop_msg_encode(cmd, ctx, buf, sizeof(buf)) > 0)
e9a1cd93
RZ
2243 ret = netlink_talk_info(netlink_talk_filter, (void *)&buf,
2244 dplane_ctx_get_ns(ctx), 0);
2245 else
2246 ret = 0;
f820d025
SW
2247
2248 return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS
2249 : ZEBRA_DPLANE_REQUEST_FAILURE);
2250}
2251
7cdb1a84
MS
2252/*
2253 * Update or delete a prefix from the kernel,
2254 * using info from a dataplane context.
2255 */
25779064 2256enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
7cdb1a84
MS
2257{
2258 int cmd, ret;
2259 const struct prefix *p = dplane_ctx_get_dest(ctx);
f183e380 2260 struct nexthop *nexthop;
e57a3fab 2261 uint8_t nl_pkt[NL_PKT_BUF_SIZE];
7cdb1a84
MS
2262
2263 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2264 cmd = RTM_DELROUTE;
2265 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2266 cmd = RTM_NEWROUTE;
2267 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2268
2269 if (p->family == AF_INET || v6_rr_semantics) {
2270 /* Single 'replace' operation */
2271 cmd = RTM_NEWROUTE;
fe5f21af
DS
2272
2273 /*
2274 * With route replace semantics in place
2275 * for v4 routes and the new route is a system
2276 * route we do not install anything.
2277 * The problem here is that the new system
2278 * route should cause us to withdraw from
2279 * the kernel the old non-system route
2280 */
2281 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) &&
e57a3fab 2282 !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) {
0be6e7d7
JU
2283 if (netlink_route_multipath_msg_encode(
2284 RTM_DELROUTE, ctx, nl_pkt,
2285 sizeof(nl_pkt), false, false)
2286 > 0)
2287 netlink_talk_info(
2288 netlink_talk_filter,
2289 (struct nlmsghdr *)nl_pkt,
2290 dplane_ctx_get_ns(ctx), 0);
e57a3fab 2291 }
7cdb1a84
MS
2292 } else {
2293 /*
2294 * So v6 route replace semantics are not in
2295 * the kernel at this point as I understand it.
2296 * so let's do a delete then an add.
2297 * In the future once v6 route replace semantics
2298 * are in we can figure out what to do here to
2299 * allow working with old and new kernels.
2300 *
2301 * I'm also intentionally ignoring the failure case
2302 * of the route delete. If that happens yeah we're
2303 * screwed.
2304 */
e57a3fab 2305 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) {
0be6e7d7
JU
2306 if (netlink_route_multipath_msg_encode(
2307 RTM_DELROUTE, ctx, nl_pkt,
2308 sizeof(nl_pkt), false, false)
2309 > 0)
2310 netlink_talk_info(
2311 netlink_talk_filter,
2312 (struct nlmsghdr *)nl_pkt,
2313 dplane_ctx_get_ns(ctx), 0);
e57a3fab 2314 }
7cdb1a84
MS
2315 cmd = RTM_NEWROUTE;
2316 }
2317
2318 } else {
2319 return ZEBRA_DPLANE_REQUEST_FAILURE;
2320 }
2321
e57a3fab 2322 if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) {
0be6e7d7
JU
2323 if (netlink_route_multipath_msg_encode(
2324 cmd, ctx, nl_pkt, sizeof(nl_pkt), false, false)
2325 > 0)
2326 ret = netlink_talk_info(netlink_talk_filter,
2327 (struct nlmsghdr *)nl_pkt,
2328 dplane_ctx_get_ns(ctx), 0);
2329 else
2330 ret = -1;
2331
e57a3fab 2332 } else
3cdba47a 2333 ret = 0;
f183e380
MS
2334 if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
2335 /* Update installed nexthops to signal which have been
2336 * installed.
2337 */
2338 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2339 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2340 continue;
2341
2342 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
2343 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
f183e380
MS
2344 }
2345 }
2346 }
7cdb1a84
MS
2347
2348 return (ret == 0 ?
2349 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
2350}
2351
d9f5b2f5
SW
2352/**
2353 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2354 *
2355 * @tb: Netlink RTA data
2356 * @family: Address family in the nhmsg
8c0a24c1 2357 * @ifp: Interface connected - this should be NULL, we fill it in
d9f5b2f5
SW
2358 * @ns_id: Namspace id
2359 *
2360 * Return: New nexthop
2361 */
e22e8001
SW
2362static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2363 unsigned char family,
2364 struct interface **ifp,
2365 ns_id_t ns_id)
d9f5b2f5 2366{
e22e8001 2367 struct nexthop nh = {};
d9f5b2f5 2368 void *gate = NULL;
8e401b25 2369 enum nexthop_types_t type = 0;
e22e8001
SW
2370 int if_index = 0;
2371 size_t sz = 0;
7134ba70 2372 struct interface *ifp_lookup;
d9f5b2f5
SW
2373
2374 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2375
8e401b25 2376
d9f5b2f5
SW
2377 if (tb[NHA_GATEWAY]) {
2378 switch (family) {
2379 case AF_INET:
8e401b25 2380 type = NEXTHOP_TYPE_IPV4_IFINDEX;
d9f5b2f5
SW
2381 sz = 4;
2382 break;
2383 case AF_INET6:
8e401b25 2384 type = NEXTHOP_TYPE_IPV6_IFINDEX;
d9f5b2f5
SW
2385 sz = 16;
2386 break;
2387 default:
2388 flog_warn(
2389 EC_ZEBRA_BAD_NHG_MESSAGE,
c4239c05 2390 "Nexthop gateway with bad address family (%d) received from kernel",
d9f5b2f5 2391 family);
e22e8001 2392 return nh;
d9f5b2f5
SW
2393 }
2394 gate = RTA_DATA(tb[NHA_GATEWAY]);
e22e8001 2395 } else
8e401b25 2396 type = NEXTHOP_TYPE_IFINDEX;
d9f5b2f5 2397
8e401b25 2398 if (type)
e22e8001 2399 nh.type = type;
8e401b25
SW
2400
2401 if (gate)
e22e8001 2402 memcpy(&(nh.gate), gate, sz);
8e401b25
SW
2403
2404 if (if_index)
e22e8001 2405 nh.ifindex = if_index;
8e401b25 2406
7134ba70
DS
2407 ifp_lookup =
2408 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
2409
e22e8001 2410 if (ifp)
7134ba70
DS
2411 *ifp = ifp_lookup;
2412 if (ifp_lookup)
2413 nh.vrf_id = ifp_lookup->vrf_id;
e22e8001 2414 else {
d9f5b2f5
SW
2415 flog_warn(
2416 EC_ZEBRA_UNKNOWN_INTERFACE,
2417 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
15569c58 2418 __func__, nh.ifindex);
d9f5b2f5 2419
e22e8001 2420 nh.vrf_id = VRF_DEFAULT;
d9f5b2f5
SW
2421 }
2422
2423 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
2424 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
2425 int num_labels = 0;
6e728764 2426
d9f5b2f5
SW
2427 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
2428
e22e8001 2429 if (encap_type == LWTUNNEL_ENCAP_MPLS)
d9f5b2f5 2430 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
d9f5b2f5 2431
e22e8001
SW
2432 if (num_labels)
2433 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
d9f5b2f5 2434 labels);
d9f5b2f5
SW
2435 }
2436
2437 return nh;
2438}
2439
85f5e761 2440static int netlink_nexthop_process_group(struct rtattr **tb,
5a935f79 2441 struct nh_grp *z_grp, int z_grp_size)
d9f5b2f5 2442{
e22e8001
SW
2443 uint8_t count = 0;
2444 /* linux/nexthop.h group struct */
d9f5b2f5
SW
2445 struct nexthop_grp *n_grp = NULL;
2446
85f5e761 2447 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
d9f5b2f5
SW
2448 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
2449
2450 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
2451 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
2452 "Invalid nexthop group received from the kernel");
85f5e761 2453 return count;
d9f5b2f5
SW
2454 }
2455
38e40db1 2456#if 0
d9f5b2f5 2457 // TODO: Need type for something?
85f5e761
SW
2458 zlog_debug("Nexthop group type: %d",
2459 *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE])));
d9f5b2f5 2460
38e40db1 2461#endif
d9f5b2f5 2462
5a935f79 2463 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
e22e8001 2464 z_grp[i].id = n_grp[i].id;
df7fb580 2465 z_grp[i].weight = n_grp[i].weight + 1;
85f5e761 2466 }
d9f5b2f5
SW
2467 return count;
2468}
2469
2470/**
2471 * netlink_nexthop_change() - Read in change about nexthops from the kernel
2472 *
2473 * @h: Netlink message header
2474 * @ns_id: Namspace id
2475 * @startup: Are we reading under startup conditions?
2476 *
2477 * Return: Result status
2478 */
2479int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2480{
2481 int len;
2482 /* nexthop group id */
2483 uint32_t id;
2484 unsigned char family;
38e40db1 2485 int type;
e8b0e420 2486 afi_t afi = AFI_UNSPEC;
946de1b9 2487 vrf_id_t vrf_id = VRF_DEFAULT;
8c0a24c1 2488 struct interface *ifp = NULL;
d9f5b2f5 2489 struct nhmsg *nhm = NULL;
e22e8001
SW
2490 struct nexthop nh = {};
2491 struct nh_grp grp[MULTIPATH_NUM] = {};
85f5e761 2492 /* Count of nexthops in group array */
e22e8001 2493 uint8_t grp_count = 0;
e22e8001 2494 struct rtattr *tb[NHA_MAX + 1] = {};
d9f5b2f5 2495
d9f5b2f5
SW
2496 nhm = NLMSG_DATA(h);
2497
88cafda7
DS
2498 if (ns_id)
2499 vrf_id = ns_id;
2500
d9f5b2f5
SW
2501 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
2502 return 0;
2503
2504 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
2505 if (len < 0) {
2506 zlog_warn(
2507 "%s: Message received from netlink is of a broken size %d %zu",
15569c58 2508 __func__, h->nlmsg_len,
d9f5b2f5
SW
2509 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
2510 return -1;
2511 }
2512
d9f5b2f5
SW
2513 netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
2514
2515
2516 if (!tb[NHA_ID]) {
2517 flog_warn(
2518 EC_ZEBRA_BAD_NHG_MESSAGE,
2519 "Nexthop group without an ID received from the kernel");
2520 return -1;
2521 }
2522
2523 /* We use the ID key'd nhg table for kernel updates */
2524 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
d9f5b2f5 2525
506efd37
AK
2526 if (zebra_evpn_mh_is_fdb_nh(id)) {
2527 /* If this is a L2 NH just ignore it */
2528 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
2529 zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
2530 h->nlmsg_type, id);
2531 }
2532 return 0;
2533 }
2534
e8b0e420 2535 family = nhm->nh_family;
e8b0e420
SW
2536 afi = family2afi(family);
2537
38e40db1
SW
2538 type = proto2zebra(nhm->nh_protocol, 0, true);
2539
fdee485a
SW
2540 if (IS_ZEBRA_DEBUG_KERNEL)
2541 zlog_debug("%s ID (%u) %s NS %u",
2542 nl_msg_type_to_str(h->nlmsg_type), id,
2543 nl_family_to_str(family), ns_id);
2544
2545
d9f5b2f5
SW
2546 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
2547 if (tb[NHA_GROUP]) {
2548 /**
2549 * If this is a group message its only going to have
2550 * an array of nexthop IDs associated with it
2551 */
5a935f79
SW
2552 grp_count = netlink_nexthop_process_group(
2553 tb, grp, array_size(grp));
85f5e761
SW
2554 } else {
2555 if (tb[NHA_BLACKHOLE]) {
2556 /**
2557 * This nexthop is just for blackhole-ing
2558 * traffic, it should not have an OIF, GATEWAY,
2559 * or ENCAP
2560 */
e22e8001
SW
2561 nh.type = NEXTHOP_TYPE_BLACKHOLE;
2562 nh.bh_type = BLACKHOLE_UNSPEC;
2563 } else if (tb[NHA_OIF])
85f5e761
SW
2564 /**
2565 * This is a true new nexthop, so we need
2566 * to parse the gateway and device info
2567 */
2568 nh = netlink_nexthop_process_nh(tb, family,
2569 &ifp, ns_id);
e22e8001
SW
2570 else {
2571
8e401b25
SW
2572 flog_warn(
2573 EC_ZEBRA_BAD_NHG_MESSAGE,
2574 "Invalid Nexthop message received from the kernel with ID (%u)",
2575 id);
2576 return -1;
2577 }
e22e8001
SW
2578 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
2579 if (nhm->nh_flags & RTNH_F_ONLINK)
2580 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
2581 vrf_id = nh.vrf_id;
d9f5b2f5
SW
2582 }
2583
38e40db1
SW
2584 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
2585 type, startup))
e22e8001 2586 return -1;
8e401b25 2587
9a1588c4 2588 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
88cafda7 2589 zebra_nhg_kernel_del(id, vrf_id);
d9f5b2f5 2590
d9f5b2f5
SW
2591 return 0;
2592}
2593
2594/**
2595 * netlink_request_nexthop() - Request nextop information from the kernel
2596 * @zns: Zebra namespace
2597 * @family: AF_* netlink family
2598 * @type: RTM_* route type
2599 *
2600 * Return: Result status
2601 */
2602static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
2603{
2604 struct {
2605 struct nlmsghdr n;
2606 struct nhmsg nhm;
2607 } req;
2608
2609 /* Form the request, specifying filter (rtattr) if needed. */
2610 memset(&req, 0, sizeof(req));
2611 req.n.nlmsg_type = type;
2612 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
2613 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2614 req.nhm.nh_family = family;
2615
fd3f8e52 2616 return netlink_request(&zns->netlink_cmd, &req);
d9f5b2f5
SW
2617}
2618
7d5bb02b 2619
d9f5b2f5
SW
2620/**
2621 * netlink_nexthop_read() - Nexthop read function using netlink interface
2622 *
2623 * @zns: Zebra name space
2624 *
2625 * Return: Result status
2626 * Only called at bootstrap time.
2627 */
2628int netlink_nexthop_read(struct zebra_ns *zns)
2629{
2630 int ret;
2631 struct zebra_dplane_info dp_info;
2632
2633 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2634
2635 /* Get nexthop objects */
2636 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
2637 if (ret < 0)
2638 return ret;
2639 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
2640 &dp_info, 0, 1);
81505946
SW
2641
2642 if (!ret)
2643 /* If we succesfully read in nexthop objects,
2644 * this kernel must support them.
2645 */
2646 supports_nh = true;
7c99d51b
MS
2647
2648 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2649 zlog_debug("Nexthop objects %ssupported on this kernel",
2650 supports_nh ? "" : "not ");
81505946 2651
60e0eaee 2652 return ret;
d9f5b2f5
SW
2653}
2654
2655
d62a17ae 2656int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
5895d33f 2657 int llalen, ns_id_t ns_id)
6b8a5694 2658{
d62a17ae 2659 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
5895d33f 2660 addr, lla, llalen, ns_id);
6b8a5694 2661}
718e3744 2662
340845e2 2663/**
0be6e7d7
JU
2664 * netlink_neigh_update_msg_encode() - Common helper api for encoding
2665 * evpn neighbor update as netlink messages using dataplane context object.
bbd4285b 2666 * Here, a neighbor refers to a bridge forwarding database entry for
2667 * either unicast forwarding or head-end replication or an IP neighbor
2668 * entry.
340845e2
JU
2669 * @ctx: Dataplane context
2670 * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
2671 * @mac: A neighbor cache link layer address
2672 * @ip: A neighbor cache n/w layer destination address
bbd4285b 2673 * In the case of bridge FDB, this represnts the remote
2674 * VTEP IP.
340845e2
JU
2675 * @replace_obj: Whether NEW request should replace existing object or
2676 * add to the end of the list
2677 * @family: AF_* netlink family
2678 * @type: RTN_* route type
2679 * @flags: NTF_* flags
2680 * @state: NUD_* states
d4d4ec1c
RZ
2681 * @data: data buffer pointer
2682 * @datalen: total amount of data buffer space
340845e2 2683 *
0be6e7d7
JU
2684 * Return: 0 when the msg doesn't fit entirely in the buffer
2685 * otherwise the number of bytes written to buf.
13d60d35 2686 */
0be6e7d7
JU
2687static ssize_t netlink_neigh_update_msg_encode(
2688 const struct zebra_dplane_ctx *ctx, int cmd, const struct ethaddr *mac,
2689 const struct ipaddr *ip, bool replace_obj, uint8_t family, uint8_t type,
506efd37 2690 uint8_t flags, uint16_t state, uint32_t nhg_id,
f188e68e 2691 bool nfy, uint8_t nfy_flags,
506efd37 2692 void *data, size_t datalen)
13d60d35 2693{
f3dbec60 2694 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 2695 struct {
2696 struct nlmsghdr n;
2697 struct ndmsg ndm;
d4d4ec1c
RZ
2698 char buf[];
2699 } *req = data;
340845e2
JU
2700 int ipa_len;
2701 enum dplane_op_e op;
d62a17ae 2702
0be6e7d7
JU
2703 if (datalen < sizeof(*req))
2704 return 0;
45c80fbd 2705 memset(req, 0, sizeof(*req));
d62a17ae 2706
340845e2
JU
2707 op = dplane_ctx_get_op(ctx);
2708
d4d4ec1c
RZ
2709 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2710 req->n.nlmsg_flags = NLM_F_REQUEST;
d62a17ae 2711 if (cmd == RTM_NEWNEIGH)
d4d4ec1c 2712 req->n.nlmsg_flags |=
340845e2
JU
2713 NLM_F_CREATE
2714 | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
d4d4ec1c
RZ
2715 req->n.nlmsg_type = cmd;
2716 req->ndm.ndm_family = family;
2717 req->ndm.ndm_type = type;
2718 req->ndm.ndm_state = state;
2719 req->ndm.ndm_flags = flags;
2720 req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
d62a17ae 2721
45c80fbd 2722 if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
0be6e7d7
JU
2723 sizeof(protocol)))
2724 return 0;
2725
2726 if (mac) {
2727 if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, mac, 6))
2728 return 0;
2729 }
13d60d35 2730
506efd37
AK
2731 if (nhg_id) {
2732 if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
2733 return 0;
2734 }
f188e68e 2735 if (nfy) {
b169fd6f 2736 if (!nl_attr_put(&req->n, datalen, NDA_NOTIFY,
f188e68e
AK
2737 &nfy_flags, sizeof(nfy_flags)))
2738 return 0;
2739 }
506efd37 2740
340845e2 2741 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
0be6e7d7
JU
2742 if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, ipa_len))
2743 return 0;
340845e2
JU
2744
2745 if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
2746 vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
13d60d35 2747
0be6e7d7
JU
2748 if (vid > 0) {
2749 if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
2750 return 0;
2751 }
13d60d35 2752
0be6e7d7
JU
2753 if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
2754 dplane_ctx_mac_get_br_ifindex(ctx)))
2755 return 0;
340845e2 2756 }
13d60d35 2757
d4d4ec1c 2758 return NLMSG_ALIGN(req->n.nlmsg_len);
13d60d35 2759}
2760
340845e2
JU
2761/*
2762 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
2763 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
2764 */
2765static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx,
2766 int cmd)
2767{
2768 struct ethaddr dst_mac = {.octet = {0}};
d4d4ec1c 2769 uint8_t nl_pkt[NL_PKT_BUF_SIZE];
340845e2 2770
0be6e7d7
JU
2771 if (netlink_neigh_update_msg_encode(
2772 ctx, cmd, &dst_mac, dplane_ctx_neigh_get_ipaddr(ctx), false,
506efd37 2773 PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT),
f188e68e
AK
2774 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/,
2775 nl_pkt, sizeof(nl_pkt))
0be6e7d7
JU
2776 <= 0)
2777 return -1;
d4d4ec1c
RZ
2778
2779 return netlink_talk_info(netlink_talk_filter,
2780 (struct nlmsghdr *)nl_pkt,
2781 dplane_ctx_get_ns(ctx), 0);
340845e2
JU
2782}
2783
2232a77c 2784#ifndef NDA_RTA
d62a17ae 2785#define NDA_RTA(r) \
2786 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2232a77c 2787#endif
2788
2414abd3 2789static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 2790{
d62a17ae 2791 struct ndmsg *ndm;
2792 struct interface *ifp;
2793 struct zebra_if *zif;
d62a17ae 2794 struct rtattr *tb[NDA_MAX + 1];
2795 struct interface *br_if;
2796 struct ethaddr mac;
2797 vlanid_t vid = 0;
4b3f26f4 2798 struct in_addr vtep_ip;
d62a17ae 2799 int vid_present = 0, dst_present = 0;
2800 char buf[ETHER_ADDR_STRLEN];
2801 char vid_buf[20];
2802 char dst_buf[30];
a37f4598 2803 bool sticky;
f188e68e
AK
2804 bool local_inactive = false;
2805 bool dp_static = false;
2806 uint32_t nhg_id = 0;
d62a17ae 2807
2808 ndm = NLMSG_DATA(h);
2809
2853fed6 2810 /* We only process macfdb notifications if EVPN is enabled */
2811 if (!is_evpn_enabled())
2812 return 0;
2813
4b3f26f4 2814 /* Parse attributes and extract fields of interest. Do basic
2815 * validation of the fields.
2816 */
2817 memset(tb, 0, sizeof tb);
d62a17ae 2818 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2819
2820 if (!tb[NDA_LLADDR]) {
28bd0652 2821 if (IS_ZEBRA_DEBUG_KERNEL)
4b3f26f4 2822 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
28bd0652 2823 nl_msg_type_to_str(h->nlmsg_type),
4b3f26f4 2824 ndm->ndm_ifindex);
d62a17ae 2825 return 0;
2826 }
2827
ff8b7eb8 2828 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
2829 if (IS_ZEBRA_DEBUG_KERNEL)
2830 zlog_debug(
4b3f26f4 2831 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
2832 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
28bd0652 2833 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 2834 return 0;
2835 }
2836
ff8b7eb8 2837 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 2838
2839 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
2840 vid_present = 1;
d7c0a89a 2841 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
772270f3 2842 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
d62a17ae 2843 }
2844
2845 if (tb[NDA_DST]) {
2846 /* TODO: Only IPv4 supported now. */
2847 dst_present = 1;
4b3f26f4 2848 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
d62a17ae 2849 IPV4_MAX_BYTELEN);
772270f3
QY
2850 snprintf(dst_buf, sizeof(dst_buf), " dst %s",
2851 inet_ntoa(vtep_ip));
d62a17ae 2852 }
2853
f188e68e
AK
2854 if (tb[NDA_NH_ID])
2855 nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
2856
2857 if (ndm->ndm_state & NUD_STALE)
2858 local_inactive = true;
2859
2860 if (tb[NDA_NOTIFY]) {
2861 uint8_t nfy_flags;
2862
2863 dp_static = true;
2864 nfy_flags = *(uint8_t *)RTA_DATA(tb[NDA_NOTIFY]);
2865 /* local activity has not been detected on the entry */
2866 if (nfy_flags & (1 << BR_FDB_NFY_INACTIVE))
2867 local_inactive = true;
2868 }
2869
d62a17ae 2870 if (IS_ZEBRA_DEBUG_KERNEL)
f188e68e 2871 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s nhg %d",
d62a17ae 2872 nl_msg_type_to_str(h->nlmsg_type),
d62a17ae 2873 ndm->ndm_ifindex, vid_present ? vid_buf : "",
4b3f26f4 2874 ndm->ndm_state, ndm->ndm_flags,
d62a17ae 2875 prefix_mac2str(&mac, buf, sizeof(buf)),
f188e68e 2876 dst_present ? dst_buf : "", nhg_id);
d62a17ae 2877
4b3f26f4 2878 /* The interface should exist. */
2879 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2880 ndm->ndm_ifindex);
2881 if (!ifp || !ifp->info)
2882 return 0;
2883
2884 /* The interface should be something we're interested in. */
2885 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
2886 return 0;
2887
2888 zif = (struct zebra_if *)ifp->info;
2889 if ((br_if = zif->brslave_info.br_if) == NULL) {
2890 if (IS_ZEBRA_DEBUG_KERNEL)
2891 zlog_debug(
2892 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
2893 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
2894 ndm->ndm_ifindex,
2895 zif->brslave_info.bridge_ifindex);
2896 return 0;
2897 }
2898
f188e68e 2899 sticky = !!(ndm->ndm_flags & NTF_STICKY);
4b3f26f4 2900
28bd0652
DS
2901 if (filter_vlan && vid != filter_vlan) {
2902 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e 2903 zlog_debug(" Filtered due to filter vlan: %d",
28bd0652 2904 filter_vlan);
d62a17ae 2905 return 0;
28bd0652 2906 }
d62a17ae 2907
2908 /* If add or update, do accordingly if learnt on a "local" interface; if
2909 * the notification is over VxLAN, this has to be related to
2910 * multi-homing,
2911 * so perform an implicit delete of any local entry (if it exists).
2912 */
2913 if (h->nlmsg_type == RTM_NEWNEIGH) {
4b3f26f4 2914 /* Drop "permanent" entries. */
2915 if (ndm->ndm_state & NUD_PERMANENT) {
2916 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e
DL
2917 zlog_debug(
2918 " Dropping entry because of NUD_PERMANENT");
2919 return 0;
4b3f26f4 2920 }
2921
d62a17ae 2922 if (IS_ZEBRA_IF_VXLAN(ifp))
2923 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
2924 vid);
2925
2926 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
f188e68e 2927 sticky, local_inactive, dp_static);
d62a17ae 2928 }
2929
2930 /* This is a delete notification.
4b3f26f4 2931 * Ignore the notification with IP dest as it may just signify that the
2932 * MAC has moved from remote to local. The exception is the special
2933 * all-zeros MAC that represents the BUM flooding entry; we may have
2934 * to readd it. Otherwise,
d62a17ae 2935 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
2936 * 2. For a MAC over "local" interface, delete the mac
2937 * Note: We will get notifications from both bridge driver and VxLAN
2938 * driver.
d62a17ae 2939 */
f188e68e
AK
2940 if (nhg_id)
2941 return 0;
2942
28bd0652 2943 if (dst_present) {
4b3f26f4 2944 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2945
2946 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
2947 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
d62a17ae 2948 return 0;
28bd0652 2949 }
d62a17ae 2950
2951 if (IS_ZEBRA_IF_VXLAN(ifp))
2952 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
2953 vid);
2954
2955 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2232a77c 2956}
2957
2414abd3 2958static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 2959{
d62a17ae 2960 int len;
2961 struct ndmsg *ndm;
2232a77c 2962
d62a17ae 2963 if (h->nlmsg_type != RTM_NEWNEIGH)
2964 return 0;
2232a77c 2965
d62a17ae 2966 /* Length validity. */
2967 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2968 if (len < 0)
2969 return -1;
2232a77c 2970
d62a17ae 2971 /* We are interested only in AF_BRIDGE notifications. */
2972 ndm = NLMSG_DATA(h);
2973 if (ndm->ndm_family != AF_BRIDGE)
2974 return 0;
2232a77c 2975
2414abd3 2976 return netlink_macfdb_change(h, len, ns_id);
2232a77c 2977}
2978
2979/* Request for MAC FDB information from the kernel */
85a75f1e
MS
2980static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
2981 int type, ifindex_t master_ifindex)
2232a77c 2982{
d62a17ae 2983 struct {
2984 struct nlmsghdr n;
2985 struct ifinfomsg ifm;
2986 char buf[256];
2987 } req;
2988
2989 /* Form the request, specifying filter (rtattr) if needed. */
2990 memset(&req, 0, sizeof(req));
2991 req.n.nlmsg_type = type;
718f9b0f 2992 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 2993 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
2994 req.ifm.ifi_family = family;
2995 if (master_ifindex)
312a6bee 2996 nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
d62a17ae 2997
fd3f8e52 2998 return netlink_request(netlink_cmd, &req);
2232a77c 2999}
3000
3001/*
3002 * MAC forwarding database read using netlink interface. This is invoked
3003 * at startup.
3004 */
d62a17ae 3005int netlink_macfdb_read(struct zebra_ns *zns)
2232a77c 3006{
d62a17ae 3007 int ret;
85a75f1e
MS
3008 struct zebra_dplane_info dp_info;
3009
3010 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 3011
3012 /* Get bridge FDB table. */
85a75f1e
MS
3013 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3014 0);
d62a17ae 3015 if (ret < 0)
3016 return ret;
3017 /* We are reading entire table. */
3018 filter_vlan = 0;
85a75f1e
MS
3019 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3020 &dp_info, 0, 1);
d62a17ae 3021
3022 return ret;
2232a77c 3023}
3024
3025/*
3026 * MAC forwarding database read using netlink interface. This is for a
3027 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3028 */
d62a17ae 3029int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3030 struct interface *br_if)
2232a77c 3031{
d62a17ae 3032 struct zebra_if *br_zif;
3033 struct zebra_if *zif;
3034 struct zebra_l2info_vxlan *vxl;
85a75f1e 3035 struct zebra_dplane_info dp_info;
d62a17ae 3036 int ret = 0;
3037
85a75f1e 3038 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 3039
3040 /* Save VLAN we're filtering on, if needed. */
3041 br_zif = (struct zebra_if *)br_if->info;
3042 zif = (struct zebra_if *)ifp->info;
3043 vxl = &zif->l2info.vxl;
3044 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3045 filter_vlan = vxl->access_vlan;
3046
3047 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3048 */
85a75f1e 3049 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
d62a17ae 3050 br_if->ifindex);
3051 if (ret < 0)
3052 return ret;
85a75f1e
MS
3053 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3054 &dp_info, 0, 0);
d62a17ae 3055
3056 /* Reset VLAN filter. */
3057 filter_vlan = 0;
3058 return ret;
2232a77c 3059}
3060
67fb9374
CS
3061
3062/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3063static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
3064 int family,
3065 int type,
3066 struct interface *br_if,
3067 struct ethaddr *mac,
3068 vlanid_t vid)
3069{
3070 struct {
3071 struct nlmsghdr n;
3072 struct ndmsg ndm;
3073 char buf[256];
3074 } req;
3075 struct zebra_if *br_zif;
3076 char buf[ETHER_ADDR_STRLEN];
3077
3078 memset(&req, 0, sizeof(req));
3079 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3080 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3081 req.n.nlmsg_flags = NLM_F_REQUEST;
3082 req.ndm.ndm_family = family; /* AF_BRIDGE */
3083 /* req.ndm.ndm_state = NUD_REACHABLE; */
3084
312a6bee 3085 nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
67fb9374
CS
3086
3087 br_zif = (struct zebra_if *)br_if->info;
3088 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
312a6bee 3089 nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
67fb9374 3090
312a6bee 3091 nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
67fb9374
CS
3092
3093 if (IS_ZEBRA_DEBUG_KERNEL)
bd47f3a3
JU
3094 zlog_debug(
3095 "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %s vid %u",
3096 __func__, nl_family_to_str(req.ndm.ndm_family),
3097 br_if->name, br_if->ifindex,
3098 vrf_id_to_name(br_if->vrf_id), br_if->vrf_id,
3099 prefix_mac2str(mac, buf, sizeof(buf)), vid);
67fb9374 3100
fd3f8e52 3101 return netlink_request(&zns->netlink_cmd, &req);
67fb9374
CS
3102}
3103
3104int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3105 struct interface *br_if,
3106 struct ethaddr *mac, vlanid_t vid)
3107{
3108 int ret = 0;
3109 struct zebra_dplane_info dp_info;
3110
3111 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3112
3113 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3114 */
3115 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
3116 RTM_GETNEIGH,
3117 br_if, mac, vid);
3118 if (ret < 0)
3119 return ret;
3120
3121 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3122 &dp_info, 1, 0);
3123
3124 return ret;
3125}
036d93c0
MS
3126
3127/*
3128 * Netlink-specific handler for MAC updates using dataplane context object.
3129 */
d4d4ec1c
RZ
3130ssize_t
3131netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data,
3132 size_t datalen)
2232a77c 3133{
340845e2 3134 struct ipaddr vtep_ip;
036d93c0 3135 vlanid_t vid;
d4d4ec1c
RZ
3136 ssize_t total;
3137 int cmd;
340845e2
JU
3138 uint8_t flags;
3139 uint16_t state;
506efd37 3140 uint32_t nhg_id;
f188e68e
AK
3141 uint32_t update_flags;
3142 bool nfy = false;
3143 uint8_t nfy_flags = 0;
d4d4ec1c
RZ
3144
3145 cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3146 ? RTM_NEWNEIGH : RTM_DELNEIGH;
036d93c0 3147
f188e68e 3148 flags = NTF_MASTER;
340845e2 3149 state = NUD_REACHABLE;
d62a17ae 3150
f188e68e
AK
3151 update_flags = dplane_ctx_mac_get_update_flags(ctx);
3152 if (update_flags & DPLANE_MAC_REMOTE) {
3153 flags |= NTF_SELF;
3154 if (dplane_ctx_mac_is_sticky(ctx))
3155 flags |= NTF_STICKY;
3156 else
3157 flags |= NTF_EXT_LEARNED;
3158 /* if it was static-local previously we need to clear the
3159 * notify flags on replace with remote
3160 */
3161 if (update_flags & DPLANE_MAC_WAS_STATIC)
3162 nfy = true;
3163 } else {
3164 /* local mac */
3165 if (update_flags & DPLANE_MAC_SET_STATIC) {
3166 nfy_flags |= (1 << BR_FDB_NFY_STATIC);
3167 state |= NUD_NOARP;
3168 }
3169
3170 if (update_flags & DPLANE_MAC_SET_INACTIVE)
3171 nfy_flags |= (1 << BR_FDB_NFY_INACTIVE);
3172
3173 nfy = true;
3174 }
478566d6 3175
506efd37 3176 nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
340845e2
JU
3177 vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
3178 SET_IPADDR_V4(&vtep_ip);
d62a17ae 3179
036d93c0
MS
3180 if (IS_ZEBRA_DEBUG_KERNEL) {
3181 char ipbuf[PREFIX_STRLEN];
3182 char buf[ETHER_ADDR_STRLEN];
478566d6 3183 char vid_buf[20];
506efd37 3184 const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
478566d6 3185
340845e2
JU
3186 vid = dplane_ctx_mac_get_vlan(ctx);
3187 if (vid > 0)
478566d6
MS
3188 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3189 else
3190 vid_buf[0] = '\0';
036d93c0 3191
f188e68e 3192 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s dst %s nhg %u%s%s%s%s%s",
340845e2 3193 nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
478566d6
MS
3194 dplane_ctx_get_ifname(ctx),
3195 dplane_ctx_get_ifindex(ctx), vid_buf,
036d93c0 3196 dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
340845e2 3197 prefix_mac2str(mac, buf, sizeof(buf)),
506efd37 3198 ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf)),
f188e68e
AK
3199 nhg_id,
3200 (update_flags &
3201 DPLANE_MAC_REMOTE) ? " rem" : "",
3202 (update_flags &
3203 DPLANE_MAC_WAS_STATIC) ? " clr_sync" : "",
3204 (update_flags &
3205 DPLANE_MAC_SET_STATIC) ? " static" : "",
3206 (update_flags &
3207 DPLANE_MAC_SET_INACTIVE) ? " inactive" : "",
3208 (nfy &
3209 DPLANE_MAC_SET_INACTIVE) ? " nfy" : "");
036d93c0 3210 }
d62a17ae 3211
0be6e7d7
JU
3212 total = netlink_neigh_update_msg_encode(
3213 ctx, cmd, dplane_ctx_mac_get_addr(ctx), &vtep_ip, true,
f188e68e
AK
3214 AF_BRIDGE, 0, flags, state, nhg_id, nfy, nfy_flags,
3215 data, datalen);
d4d4ec1c
RZ
3216
3217 return total;
2232a77c 3218}
3219
f17b99ed
DS
3220/*
3221 * In the event the kernel deletes ipv4 link-local neighbor entries created for
3222 * 5549 support, re-install them.
3223 */
3224static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
9b036974
DS
3225 struct interface *ifp, struct ipaddr *ip,
3226 bool handle_failed)
f17b99ed
DS
3227{
3228 if (ndm->ndm_family != AF_INET)
3229 return;
3230
3231 if (!zif->v6_2_v4_ll_neigh_entry)
3232 return;
3233
3234 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
3235 return;
3236
9b036974
DS
3237 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
3238 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
3239 ifp->name);
3240 return;
3241 }
3242
f17b99ed
DS
3243 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
3244}
3245
d62a17ae 3246#define NUD_VALID \
3247 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
3248 | NUD_DELAY)
f188e68e
AK
3249#define NUD_LOCAL_ACTIVE \
3250 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
2232a77c 3251
2414abd3 3252static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 3253{
d62a17ae 3254 struct ndmsg *ndm;
3255 struct interface *ifp;
3256 struct zebra_if *zif;
d62a17ae 3257 struct rtattr *tb[NDA_MAX + 1];
3258 struct interface *link_if;
3259 struct ethaddr mac;
3260 struct ipaddr ip;
bd47f3a3 3261 struct vrf *vrf;
d62a17ae 3262 char buf[ETHER_ADDR_STRLEN];
3263 char buf2[INET6_ADDRSTRLEN];
3264 int mac_present = 0;
a37f4598 3265 bool is_ext;
3266 bool is_router;
f188e68e 3267 bool local_inactive;
d62a17ae 3268
3269 ndm = NLMSG_DATA(h);
3270
3271 /* The interface should exist. */
5895d33f 3272 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
d62a17ae 3273 ndm->ndm_ifindex);
2853fed6 3274 if (!ifp || !ifp->info)
d62a17ae 3275 return 0;
3276
bd47f3a3 3277 vrf = vrf_lookup_by_id(ifp->vrf_id);
20089ae2
DS
3278 zif = (struct zebra_if *)ifp->info;
3279
3280 /* Parse attributes and extract fields of interest. */
0d6f7fd6 3281 memset(tb, 0, sizeof(tb));
20089ae2
DS
3282 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3283
3284 if (!tb[NDA_DST]) {
bd47f3a3 3285 zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
9df414fe
QY
3286 nl_msg_type_to_str(h->nlmsg_type),
3287 nl_family_to_str(ndm->ndm_family), ifp->name,
bd47f3a3 3288 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id);
d62a17ae 3289 return 0;
20089ae2
DS
3290 }
3291
3292 memset(&ip, 0, sizeof(struct ipaddr));
3293 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3294 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3295
f17b99ed
DS
3296 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3297 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
9b036974 3298 netlink_handle_5549(ndm, zif, ifp, &ip, false);
28bd0652
DS
3299 if (IS_ZEBRA_DEBUG_KERNEL)
3300 zlog_debug(
3301 "\tNeighbor Entry Received is a 5549 entry, finished");
20089ae2
DS
3302 return 0;
3303 }
d62a17ae 3304
f17b99ed 3305 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
9b036974
DS
3306 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3307 netlink_handle_5549(ndm, zif, ifp, &ip, true);
f17b99ed 3308
d62a17ae 3309 /* The neighbor is present on an SVI. From this, we locate the
3310 * underlying
3311 * bridge because we're only interested in neighbors on a VxLAN bridge.
3312 * The bridge is located based on the nature of the SVI:
3313 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
3314 * interface
3315 * and is linked to the bridge
3316 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
3317 * inteface
3318 * itself
3319 */
3320 if (IS_ZEBRA_IF_VLAN(ifp)) {
5895d33f 3321 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
71349e03 3322 zif->link_ifindex);
d62a17ae 3323 if (!link_if)
3324 return 0;
3325 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
3326 link_if = ifp;
28bd0652
DS
3327 else {
3328 if (IS_ZEBRA_DEBUG_KERNEL)
3329 zlog_debug(
3330 "\tNeighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
d62a17ae 3331 return 0;
28bd0652 3332 }
d62a17ae 3333
d62a17ae 3334 memset(&mac, 0, sizeof(struct ethaddr));
d62a17ae 3335 if (h->nlmsg_type == RTM_NEWNEIGH) {
3336 if (tb[NDA_LLADDR]) {
ff8b7eb8 3337 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
3338 if (IS_ZEBRA_DEBUG_KERNEL)
3339 zlog_debug(
bd47f3a3 3340 "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
28bd0652
DS
3341 nl_msg_type_to_str(
3342 h->nlmsg_type),
3343 nl_family_to_str(
3344 ndm->ndm_family),
3345 ifp->name, ndm->ndm_ifindex,
bd47f3a3 3346 VRF_LOGNAME(vrf), ifp->vrf_id,
28bd0652
DS
3347 (unsigned long)RTA_PAYLOAD(
3348 tb[NDA_LLADDR]));
d62a17ae 3349 return 0;
3350 }
3351
3352 mac_present = 1;
ff8b7eb8 3353 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 3354 }
3355
a37f4598 3356 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
3357 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
d62a17ae 3358
3359 if (IS_ZEBRA_DEBUG_KERNEL)
3360 zlog_debug(
bd47f3a3 3361 "Rx %s family %s IF %s(%u) vrf %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
d62a17ae 3362 nl_msg_type_to_str(h->nlmsg_type),
3363 nl_family_to_str(ndm->ndm_family), ifp->name,
bd47f3a3 3364 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id,
d62a17ae 3365 ipaddr2str(&ip, buf2, sizeof(buf2)),
3366 mac_present
3367 ? prefix_mac2str(&mac, buf, sizeof(buf))
3368 : "",
3369 ndm->ndm_state, ndm->ndm_flags);
3370
3371 /* If the neighbor state is valid for use, process as an add or
3372 * update
3373 * else process as a delete. Note that the delete handling may
3374 * result
3375 * in re-adding the neighbor if it is a valid "remote" neighbor.
3376 */
f188e68e
AK
3377 if (ndm->ndm_state & NUD_VALID) {
3378 local_inactive = !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
3379
3380 /* XXX - populate dp-static based on the sync flags
3381 * in the kernel
3382 */
ee69da27 3383 return zebra_vxlan_handle_kernel_neigh_update(
d62a17ae 3384 ifp, link_if, &ip, &mac, ndm->ndm_state,
f188e68e
AK
3385 is_ext, is_router, local_inactive,
3386 false /* dp_static */);
3387 }
d62a17ae 3388
ee69da27 3389 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
d62a17ae 3390 }
3391
3392 if (IS_ZEBRA_DEBUG_KERNEL)
bd47f3a3 3393 zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %s",
d62a17ae 3394 nl_msg_type_to_str(h->nlmsg_type),
3395 nl_family_to_str(ndm->ndm_family), ifp->name,
bd47f3a3 3396 ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id,
d62a17ae 3397 ipaddr2str(&ip, buf2, sizeof(buf2)));
3398
3399 /* Process the delete - it may result in re-adding the neighbor if it is
3400 * a valid "remote" neighbor.
3401 */
ee69da27 3402 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2232a77c 3403}
3404
2414abd3 3405static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 3406{
d62a17ae 3407 int len;
3408 struct ndmsg *ndm;
2232a77c 3409
d62a17ae 3410 if (h->nlmsg_type != RTM_NEWNEIGH)
3411 return 0;
2232a77c 3412
d62a17ae 3413 /* Length validity. */
3414 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3415 if (len < 0)
3416 return -1;
2232a77c 3417
d62a17ae 3418 /* We are interested only in AF_INET or AF_INET6 notifications. */
3419 ndm = NLMSG_DATA(h);
3420 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
3421 return 0;
2232a77c 3422
2414abd3 3423 return netlink_neigh_change(h, len);
2232a77c 3424}
3425
3426/* Request for IP neighbor information from the kernel */
85a75f1e
MS
3427static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
3428 int type, ifindex_t ifindex)
2232a77c 3429{
d62a17ae 3430 struct {
3431 struct nlmsghdr n;
3432 struct ndmsg ndm;
3433 char buf[256];
3434 } req;
3435
3436 /* Form the request, specifying filter (rtattr) if needed. */
3437 memset(&req, 0, sizeof(req));
3438 req.n.nlmsg_type = type;
718f9b0f 3439 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 3440 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3441 req.ndm.ndm_family = family;
3442 if (ifindex)
312a6bee 3443 nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
d62a17ae 3444
fd3f8e52 3445 return netlink_request(netlink_cmd, &req);
2232a77c 3446}
3447
3448/*
3449 * IP Neighbor table read using netlink interface. This is invoked
3450 * at startup.
3451 */
d62a17ae 3452int netlink_neigh_read(struct zebra_ns *zns)
2232a77c 3453{
d62a17ae 3454 int ret;
85a75f1e
MS
3455 struct zebra_dplane_info dp_info;
3456
3457 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3458
d62a17ae 3459 /* Get IP neighbor table. */
85a75f1e
MS
3460 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
3461 0);
d62a17ae 3462 if (ret < 0)
3463 return ret;
85a75f1e
MS
3464 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3465 &dp_info, 0, 1);
2232a77c 3466
d62a17ae 3467 return ret;
2232a77c 3468}
3469
3470/*
3471 * IP Neighbor table read using netlink interface. This is for a specific
3472 * VLAN device.
3473 */
d62a17ae 3474int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2232a77c 3475{
d62a17ae 3476 int ret = 0;
85a75f1e
MS
3477 struct zebra_dplane_info dp_info;
3478
3479 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3480
85a75f1e 3481 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
d62a17ae 3482 vlan_if->ifindex);
3483 if (ret < 0)
3484 return ret;
85a75f1e
MS
3485 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3486 &dp_info, 0, 0);
2232a77c 3487
d62a17ae 3488 return ret;
2232a77c 3489}
3490
67fb9374
CS
3491/*
3492 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
3493 * read using netlink interface.
3494 */
3495static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
3496 int type, struct ipaddr *ip,
3497 ifindex_t ifindex)
3498{
3499 struct {
3500 struct nlmsghdr n;
3501 struct ndmsg ndm;
3502 char buf[256];
3503 } req;
3504 int ipa_len;
3505
3506 /* Form the request, specifying filter (rtattr) if needed. */
3507 memset(&req, 0, sizeof(req));
3508 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3509 req.n.nlmsg_flags = NLM_F_REQUEST;
3510 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3511 req.ndm.ndm_ifindex = ifindex;
3512
3513 if (IS_IPADDR_V4(ip)) {
3514 ipa_len = IPV4_MAX_BYTELEN;
3515 req.ndm.ndm_family = AF_INET;
3516
3517 } else {
3518 ipa_len = IPV6_MAX_BYTELEN;
3519 req.ndm.ndm_family = AF_INET6;
3520 }
3521
312a6bee 3522 nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
67fb9374 3523
7c26c121
CS
3524 if (IS_ZEBRA_DEBUG_KERNEL) {
3525 char buf[INET6_ADDRSTRLEN];
3526
3527 zlog_debug("%s: Tx %s family %s IF %u IP %s flags 0x%x",
3528 __func__, nl_msg_type_to_str(type),
3529 nl_family_to_str(req.ndm.ndm_family), ifindex,
3530 ipaddr2str(ip, buf, sizeof(buf)), req.n.nlmsg_flags);
3531 }
3532
fd3f8e52 3533 return netlink_request(&zns->netlink_cmd, &req);
67fb9374
CS
3534}
3535
3536int netlink_neigh_read_specific_ip(struct ipaddr *ip,
3537 struct interface *vlan_if)
3538{
3539 int ret = 0;
3540 struct zebra_ns *zns;
3541 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vlan_if->vrf_id);
3542 char buf[INET6_ADDRSTRLEN];
3543 struct zebra_dplane_info dp_info;
3544
3545 zns = zvrf->zns;
3546
3547 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3548
3549 if (IS_ZEBRA_DEBUG_KERNEL)
bd47f3a3 3550 zlog_debug("%s: neigh request IF %s(%u) IP %s vrf %s(%u)",
15569c58 3551 __func__, vlan_if->name, vlan_if->ifindex,
bd47f3a3
JU
3552 ipaddr2str(ip, buf, sizeof(buf)),
3553 vrf_id_to_name(vlan_if->vrf_id), vlan_if->vrf_id);
67fb9374
CS
3554
3555 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
3556 vlan_if->ifindex);
3557 if (ret < 0)
3558 return ret;
3559
3560 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3561 &dp_info, 1, 0);
3562
3563 return ret;
3564}
3565
2414abd3 3566int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
2232a77c 3567{
d62a17ae 3568 int len;
3569 struct ndmsg *ndm;
2232a77c 3570
d62a17ae 3571 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
3572 return 0;
2232a77c 3573
d62a17ae 3574 /* Length validity. */
3575 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
9bdf8618 3576 if (len < 0) {
15569c58
DA
3577 zlog_err(
3578 "%s: Message received from netlink is of a broken size %d %zu",
3579 __func__, h->nlmsg_len,
3580 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
d62a17ae 3581 return -1;
9bdf8618 3582 }
2232a77c 3583
d62a17ae 3584 /* Is this a notification for the MAC FDB or IP neighbor table? */
3585 ndm = NLMSG_DATA(h);
3586 if (ndm->ndm_family == AF_BRIDGE)
2414abd3 3587 return netlink_macfdb_change(h, len, ns_id);
2232a77c 3588
d62a17ae 3589 if (ndm->ndm_type != RTN_UNICAST)
3590 return 0;
2232a77c 3591
d62a17ae 3592 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2414abd3 3593 return netlink_ipneigh_change(h, len, ns_id);
8a1b681c 3594 else {
9df414fe 3595 flog_warn(
e914ccbe 3596 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
3597 "Invalid address family: %u received from kernel neighbor change: %s",
3598 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
3599 return 0;
3600 }
2232a77c 3601
d62a17ae 3602 return 0;
2232a77c 3603}
3604
931fa60c
MS
3605/*
3606 * Utility neighbor-update function, using info from dplane context.
3607 */
3608static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
3609 int cmd)
2232a77c 3610{
931fa60c
MS
3611 const struct ipaddr *ip;
3612 const struct ethaddr *mac;
3613 uint8_t flags;
3614 uint16_t state;
340845e2 3615 uint8_t family;
d4d4ec1c 3616 uint8_t nl_pkt[NL_PKT_BUF_SIZE];
d62a17ae 3617
931fa60c
MS
3618 ip = dplane_ctx_neigh_get_ipaddr(ctx);
3619 mac = dplane_ctx_neigh_get_mac(ctx);
3620 if (is_zero_mac(mac))
3621 mac = NULL;
3622
3623 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
3624 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
3625
340845e2 3626 family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
d62a17ae 3627
340845e2
JU
3628 if (IS_ZEBRA_DEBUG_KERNEL) {
3629 char buf[INET6_ADDRSTRLEN];
3630 char buf2[ETHER_ADDR_STRLEN];
d62a17ae 3631
340845e2
JU
3632 zlog_debug(
3633 "Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x",
3634 nl_msg_type_to_str(cmd), nl_family_to_str(family),
3635 dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
3636 ipaddr2str(ip, buf, sizeof(buf)),
3637 mac ? prefix_mac2str(mac, buf2, sizeof(buf2)) : "null",
3638 flags, state);
3639 }
d62a17ae 3640
0be6e7d7 3641 if (netlink_neigh_update_msg_encode(ctx, cmd, mac, ip, true, family,
f188e68e
AK
3642 RTN_UNICAST, flags, state,
3643 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/,
3644 nl_pkt, sizeof(nl_pkt))
0be6e7d7
JU
3645 <= 0)
3646 return -1;
d4d4ec1c
RZ
3647
3648 return netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt,
3649 dplane_ctx_get_ns(ctx), 0);
2232a77c 3650}
3651
036d93c0
MS
3652/*
3653 * Update MAC, using dataplane context object.
3654 */
3655enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 3656{
d4d4ec1c
RZ
3657 uint8_t nl_pkt[NL_PKT_BUF_SIZE];
3658 ssize_t rv;
340845e2 3659
d4d4ec1c
RZ
3660 rv = netlink_macfdb_update_ctx(ctx, nl_pkt, sizeof(nl_pkt));
3661 if (rv <= 0)
3662 return ZEBRA_DPLANE_REQUEST_FAILURE;
3663
3664 rv = netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt,
3665 dplane_ctx_get_ns(ctx), 0);
3666
3667 return rv == 0 ?
3668 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE;
2232a77c 3669}
3670
931fa60c 3671enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 3672{
931fa60c 3673 int ret = -1;
2232a77c 3674
931fa60c
MS
3675 switch (dplane_ctx_get_op(ctx)) {
3676 case DPLANE_OP_NEIGH_INSTALL:
3677 case DPLANE_OP_NEIGH_UPDATE:
3678 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH);
3679 break;
3680 case DPLANE_OP_NEIGH_DELETE:
3681 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH);
3682 break;
0bbd4ff4
MS
3683 case DPLANE_OP_VTEP_ADD:
3684 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH);
3685 break;
3686 case DPLANE_OP_VTEP_DELETE:
3687 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH);
3688 break;
931fa60c
MS
3689 default:
3690 break;
3691 }
2232a77c 3692
931fa60c
MS
3693 return (ret == 0 ?
3694 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
6fe2b0e6
CS
3695}
3696
16c628de
MS
3697/*
3698 * MPLS label forwarding table change via netlink interface, using dataplane
3699 * context information.
3700 */
0be6e7d7
JU
3701ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
3702 void *buf, size_t buflen)
16c628de
MS
3703{
3704 mpls_lse_t lse;
ee70f629 3705 const struct nhlfe_list_head *head;
81793ac1 3706 const zebra_nhlfe_t *nhlfe;
16c628de
MS
3707 struct nexthop *nexthop = NULL;
3708 unsigned int nexthop_num;
3709 const char *routedesc;
3710 int route_type;
9a0132a5 3711 struct prefix p = {0};
16c628de
MS
3712
3713 struct {
3714 struct nlmsghdr n;
3715 struct rtmsg r;
0be6e7d7
JU
3716 char buf[0];
3717 } *req = buf;
3718
3719 if (buflen < sizeof(*req))
3720 return 0;
16c628de 3721
0be6e7d7 3722 memset(req, 0, sizeof(*req));
16c628de
MS
3723
3724 /*
3725 * Count # nexthops so we can decide whether to use singlepath
3726 * or multipath case.
3727 */
3728 nexthop_num = 0;
ee70f629
MS
3729 head = dplane_ctx_get_nhlfe_list(ctx);
3730 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
3731 nexthop = nhlfe->nexthop;
3732 if (!nexthop)
3733 continue;
3734 if (cmd == RTM_NEWROUTE) {
3735 /* Count all selected NHLFEs */
3736 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3737 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
3738 nexthop_num++;
3739 } else { /* DEL */
3740 /* Count all installed NHLFEs */
3741 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
3742 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
3743 nexthop_num++;
3744 }
3745 }
3746
3747 if ((nexthop_num == 0) ||
3748 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
3749 return 0;
3750
0be6e7d7
JU
3751 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
3752 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
3753 req->n.nlmsg_type = cmd;
3754 req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
16c628de 3755
0be6e7d7
JU
3756 req->r.rtm_family = AF_MPLS;
3757 req->r.rtm_table = RT_TABLE_MAIN;
3758 req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
3759 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
3760 req->r.rtm_type = RTN_UNICAST;
16c628de
MS
3761
3762 if (cmd == RTM_NEWROUTE) {
3763 /* We do a replace to handle update. */
0be6e7d7 3764 req->n.nlmsg_flags |= NLM_F_REPLACE;
16c628de
MS
3765
3766 /* set the protocol value if installing */
3767 route_type = re_type_from_lsp_type(
3768 dplane_ctx_get_best_nhlfe(ctx)->type);
0be6e7d7 3769 req->r.rtm_protocol = zebra2proto(route_type);
16c628de
MS
3770 }
3771
3772 /* Fill destination */
3773 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
0be6e7d7
JU
3774 if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
3775 return 0;
16c628de
MS
3776
3777 /* Fill nexthops (paths) based on single-path or multipath. The paths
3778 * chosen depend on the operation.
3779 */
fc608372 3780 if (nexthop_num == 1) {
16c628de
MS
3781 routedesc = "single-path";
3782 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3783 routedesc);
3784
3785 nexthop_num = 0;
ee70f629 3786 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
3787 nexthop = nhlfe->nexthop;
3788 if (!nexthop)
3789 continue;
3790
3791 if ((cmd == RTM_NEWROUTE
3792 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3793 && CHECK_FLAG(nexthop->flags,
3794 NEXTHOP_FLAG_ACTIVE)))
3795 || (cmd == RTM_DELROUTE
3796 && (CHECK_FLAG(nhlfe->flags,
3797 NHLFE_FLAG_INSTALLED)
3798 && CHECK_FLAG(nexthop->flags,
3799 NEXTHOP_FLAG_FIB)))) {
3800 /* Add the gateway */
0be6e7d7
JU
3801 if (!_netlink_mpls_build_singlepath(
3802 &p, routedesc, nhlfe, &req->n,
3803 &req->r, buflen, cmd))
3804 return false;
16c628de
MS
3805
3806 nexthop_num++;
3807 break;
3808 }
3809 }
3810 } else { /* Multipath case */
312a6bee 3811 struct rtattr *nest;
81793ac1 3812 const union g_addr *src1 = NULL;
16c628de 3813
0be6e7d7
JU
3814 nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
3815 if (!nest)
3816 return 0;
16c628de
MS
3817
3818 routedesc = "multipath";
3819 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3820 routedesc);
3821
3822 nexthop_num = 0;
ee70f629 3823 frr_each(nhlfe_list_const, head, nhlfe) {
16c628de
MS
3824 nexthop = nhlfe->nexthop;
3825 if (!nexthop)
3826 continue;
3827
16c628de
MS
3828 if ((cmd == RTM_NEWROUTE
3829 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3830 && CHECK_FLAG(nexthop->flags,
3831 NEXTHOP_FLAG_ACTIVE)))
3832 || (cmd == RTM_DELROUTE
3833 && (CHECK_FLAG(nhlfe->flags,
3834 NHLFE_FLAG_INSTALLED)
3835 && CHECK_FLAG(nexthop->flags,
3836 NEXTHOP_FLAG_FIB)))) {
3837 nexthop_num++;
3838
3839 /* Build the multipath */
0be6e7d7
JU
3840 if (!_netlink_mpls_build_multipath(
3841 &p, routedesc, nhlfe, &req->n,
3842 buflen, &req->r, &src1))
3843 return 0;
16c628de
MS
3844 }
3845 }
3846
3847 /* Add the multipath */
0be6e7d7 3848 nl_attr_nest_end(&req->n, nest);
16c628de
MS
3849 }
3850
0be6e7d7 3851 return NLMSG_ALIGN(req->n.nlmsg_len);
16c628de 3852}
506efd37
AK
3853
3854/****************************************************************************
3855* This code was developed in a branch that didn't have dplane APIs for
3856* MAC updates. Hence the use of the legacy style. It will be moved to
3857* the new dplane style pre-merge to master. XXX
3858*/
3859static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
3860{
3861 struct {
3862 struct nlmsghdr n;
3863 struct nhmsg nhm;
3864 char buf[256];
3865 } req;
3866 int cmd = RTM_NEWNEXTHOP;
3867 struct zebra_vrf *zvrf;
3868 struct zebra_ns *zns;
3869
3870 zvrf = zebra_vrf_get_evpn();
3871 if (!zvrf)
3872 return -1;
3873 zns = zvrf->zns;
3874
3875 memset(&req, 0, sizeof(req));
3876
3877 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3878 req.n.nlmsg_flags = NLM_F_REQUEST;
3879 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
3880 req.n.nlmsg_type = cmd;
3881 req.nhm.nh_family = AF_INET;
3882
3883 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
3884 return -1;
3885 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
3886 return -1;
3887 if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
3888 &vtep_ip, IPV4_MAX_BYTELEN))
3889 return -1;
3890
3891 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3892 zlog_debug("Tx %s fdb-nh 0x%x %s",
3893 nl_msg_type_to_str(cmd), nh_id, inet_ntoa(vtep_ip));
3894 }
3895
3896 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
3897 0);
3898}
3899
3900static int netlink_fdb_nh_del(uint32_t nh_id)
3901{
3902 struct {
3903 struct nlmsghdr n;
3904 struct nhmsg nhm;
3905 char buf[256];
3906 } req;
3907 int cmd = RTM_DELNEXTHOP;
3908 struct zebra_vrf *zvrf;
3909 struct zebra_ns *zns;
3910
3911 zvrf = zebra_vrf_get_evpn();
3912 if (!zvrf)
3913 return -1;
3914 zns = zvrf->zns;
3915
3916 memset(&req, 0, sizeof(req));
3917
3918 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3919 req.n.nlmsg_flags = NLM_F_REQUEST;
3920 req.n.nlmsg_type = cmd;
3921 req.nhm.nh_family = AF_UNSPEC;
3922
3923 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
3924 return -1;
3925
3926 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3927 zlog_debug("Tx %s fdb-nh 0x%x",
3928 nl_msg_type_to_str(cmd), nh_id);
3929 }
3930
3931 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
3932 0);
3933}
3934
3935static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
3936 struct nh_grp *nh_ids)
3937{
3938 struct {
3939 struct nlmsghdr n;
3940 struct nhmsg nhm;
3941 char buf[256];
3942 } req;
3943 int cmd = RTM_NEWNEXTHOP;
3944 struct zebra_vrf *zvrf;
3945 struct zebra_ns *zns;
3946 struct nexthop_grp grp[nh_cnt];
3947 uint32_t i;
3948
3949 zvrf = zebra_vrf_get_evpn();
3950 if (!zvrf)
3951 return -1;
3952 zns = zvrf->zns;
3953
3954 memset(&req, 0, sizeof(req));
3955
3956 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3957 req.n.nlmsg_flags = NLM_F_REQUEST;
3958 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
3959 req.n.nlmsg_type = cmd;
3960 req.nhm.nh_family = AF_UNSPEC;
3961
3962 if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
3963 return -1;
3964 if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
3965 return -1;
3966 memset(&grp, 0, sizeof(grp));
3967 for (i = 0; i < nh_cnt; ++i) {
3968 grp[i].id = nh_ids[i].id;
3969 grp[i].weight = nh_ids[i].weight;
3970 }
3971 if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
3972 grp, nh_cnt * sizeof(struct nexthop_grp)))
3973 return -1;
3974
3975
3976 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3977 char vtep_str[ES_VTEP_LIST_STR_SZ];
3978
3979 vtep_str[0] = '\0';
3980 for (i = 0; i < nh_cnt; ++i) {
3981 sprintf(vtep_str + strlen(vtep_str), "0x%x ",
3982 grp[i].id);
3983 }
3984
3985 zlog_debug("Tx %s fdb-nhg 0x%x %s",
3986 nl_msg_type_to_str(cmd), nhg_id, vtep_str);
3987 }
3988
3989 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
3990 0);
3991}
3992
3993static int netlink_fdb_nhg_del(uint32_t nhg_id)
3994{
3995 return netlink_fdb_nh_del(nhg_id);
3996}
3997
3998int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
3999{
4000 return netlink_fdb_nh_update(nh_id, vtep_ip);
4001}
4002
4003int kernel_del_mac_nh(uint32_t nh_id)
4004{
4005 return netlink_fdb_nh_del(nh_id);
4006}
4007
4008int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
4009 struct nh_grp *nh_ids)
4010{
4011 return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
4012}
4013
4014int kernel_del_mac_nhg(uint32_t nhg_id)
4015{
4016 return netlink_fdb_nhg_del(nhg_id);
4017}
4018
ddfeb486 4019#endif /* HAVE_NETLINK */