]> git.proxmox.com Git - mirror_frr.git/blame - zebra/rt_netlink.c
zebra: abstract route src determiniation into func
[mirror_frr.git] / zebra / rt_netlink.c
CommitLineData
718e3744 1/* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21#include <zebra.h>
ddfeb486
DL
22
23#ifdef HAVE_NETLINK
24
8ccc7e80 25#include <net/if_arp.h>
ba777396
RW
26#include <linux/lwtunnel.h>
27#include <linux/mpls_iptunnel.h>
28#include <linux/neighbour.h>
29#include <linux/rtnetlink.h>
d9f5b2f5 30#include <linux/nexthop.h>
718e3744 31
32/* Hack for GNU libc version 2. */
33#ifndef MSG_TRUNC
34#define MSG_TRUNC 0x20
35#endif /* MSG_TRUNC */
36
37#include "linklist.h"
38#include "if.h"
39#include "log.h"
40#include "prefix.h"
41#include "connected.h"
42#include "table.h"
26e2ae36 43#include "memory.h"
4a1ab8e4 44#include "zebra_memory.h"
718e3744 45#include "rib.h"
e04ab74d 46#include "thread.h"
edd7c245 47#include "privs.h"
fb018d25 48#include "nexthop.h"
78104b9b 49#include "vrf.h"
5e6a74d8 50#include "vty.h"
40c7bdb0 51#include "mpls.h"
13d60d35 52#include "vxlan.h"
8d03bc50 53#include "printfrr.h"
718e3744 54
bf094f69 55#include "zebra/zapi_msg.h"
fe18ee2d 56#include "zebra/zebra_ns.h"
7c551956 57#include "zebra/zebra_vrf.h"
6621ca86 58#include "zebra/rt.h"
718e3744 59#include "zebra/redistribute.h"
60#include "zebra/interface.h"
61#include "zebra/debug.h"
12f6fb97 62#include "zebra/rtadv.h"
567b877d 63#include "zebra/zebra_ptm.h"
40c7bdb0 64#include "zebra/zebra_mpls.h"
1fdc9eae 65#include "zebra/kernel_netlink.h"
66#include "zebra/rt_netlink.h"
d9f5b2f5 67#include "zebra/zebra_nhg.h"
e3be0432 68#include "zebra/zebra_mroute.h"
2232a77c 69#include "zebra/zebra_vxlan.h"
364fed6b 70#include "zebra/zebra_errors.h"
e3be0432 71
40c7bdb0 72#ifndef AF_MPLS
73#define AF_MPLS 28
74#endif
75
2232a77c 76static vlanid_t filter_vlan = 0;
77
7c99d51b
MS
78/* We capture whether the current kernel supports nexthop ids; by
79 * default, we'll use them if possible. There's also a configuration
80 * available to _disable_ use of kernel nexthops.
81 */
fec211ad 82static bool supports_nh;
81505946 83
d62a17ae 84struct gw_family_t {
d7c0a89a
QY
85 uint16_t filler;
86 uint16_t family;
d62a17ae 87 union g_addr gate;
40c7bdb0 88};
89
2b64873d
DL
90static const char ipv4_ll_buf[16] = "169.254.0.1";
91static struct in_addr ipv4_ll;
8755598a 92
7c99d51b
MS
93/* Helper to control use of kernel-level nexthop ids */
94static bool kernel_nexthops_supported(void)
95{
96 return (supports_nh && zebra_nhg_kernel_nexthops_enabled());
97}
98
8755598a
DS
99/*
100 * The ipv4_ll data structure is used for all 5549
101 * additions to the kernel. Let's figure out the
102 * correct value one time instead for every
103 * install/remove of a 5549 type route
104 */
d62a17ae 105void rt_netlink_init(void)
8755598a 106{
d62a17ae 107 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
8755598a
DS
108}
109
931fa60c
MS
110/*
111 * Mapping from dataplane neighbor flags to netlink flags
112 */
113static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
114{
115 uint8_t flags = 0;
116
117 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
118 flags |= NTF_EXT_LEARNED;
119 if (dplane_flags & DPLANE_NTF_ROUTER)
120 flags |= NTF_ROUTER;
121
122 return flags;
123}
124
125/*
126 * Mapping from dataplane neighbor state to netlink state
127 */
128static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
129{
130 uint16_t state = 0;
131
132 if (dplane_state & DPLANE_NUD_REACHABLE)
133 state |= NUD_REACHABLE;
134 if (dplane_state & DPLANE_NUD_STALE)
135 state |= NUD_STALE;
136 if (dplane_state & DPLANE_NUD_NOARP)
137 state |= NUD_NOARP;
138 if (dplane_state & DPLANE_NUD_PROBE)
139 state |= NUD_PROBE;
140
141 return state;
142}
143
144
6a6d11a3 145static inline bool is_selfroute(int proto)
23b1f334 146{
d62a17ae 147 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
d4d71f11 148 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
d62a17ae 149 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
150 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
915902cb 151 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
0761368a 152 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
da82f6b4 153 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)) {
6a6d11a3 154 return true;
d62a17ae 155 }
156
6a6d11a3 157 return false;
23b1f334
DD
158}
159
915902cb 160static inline int zebra2proto(int proto)
23b1f334 161{
d62a17ae 162 switch (proto) {
163 case ZEBRA_ROUTE_BABEL:
164 proto = RTPROT_BABEL;
165 break;
166 case ZEBRA_ROUTE_BGP:
167 proto = RTPROT_BGP;
168 break;
169 case ZEBRA_ROUTE_OSPF:
170 case ZEBRA_ROUTE_OSPF6:
171 proto = RTPROT_OSPF;
172 break;
173 case ZEBRA_ROUTE_STATIC:
d4d71f11 174 proto = RTPROT_ZSTATIC;
d62a17ae 175 break;
176 case ZEBRA_ROUTE_ISIS:
177 proto = RTPROT_ISIS;
178 break;
179 case ZEBRA_ROUTE_RIP:
180 proto = RTPROT_RIP;
181 break;
182 case ZEBRA_ROUTE_RIPNG:
183 proto = RTPROT_RIPNG;
184 break;
185 case ZEBRA_ROUTE_NHRP:
186 proto = RTPROT_NHRP;
187 break;
188 case ZEBRA_ROUTE_EIGRP:
189 proto = RTPROT_EIGRP;
190 break;
191 case ZEBRA_ROUTE_LDP:
192 proto = RTPROT_LDP;
193 break;
8a71d93d
DS
194 case ZEBRA_ROUTE_SHARP:
195 proto = RTPROT_SHARP;
196 break;
0761368a
DS
197 case ZEBRA_ROUTE_PBR:
198 proto = RTPROT_PBR;
199 break;
da82f6b4
CF
200 case ZEBRA_ROUTE_OPENFABRIC:
201 proto = RTPROT_OPENFABRIC;
202 break;
a56ec5c0 203 case ZEBRA_ROUTE_TABLE:
38e40db1 204 case ZEBRA_ROUTE_NHG:
a56ec5c0
DS
205 proto = RTPROT_ZEBRA;
206 break;
d62a17ae 207 default:
0761368a
DS
208 /*
209 * When a user adds a new protocol this will show up
210 * to let them know to do something about it. This
211 * is intentionally a warn because we should see
212 * this as part of development of a new protocol
213 */
9df414fe
QY
214 zlog_debug(
215 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 216 __func__, proto);
d62a17ae 217 proto = RTPROT_ZEBRA;
218 break;
219 }
220
221 return proto;
23b1f334
DD
222}
223
38e40db1 224static inline int proto2zebra(int proto, int family, bool is_nexthop)
915902cb
DS
225{
226 switch (proto) {
227 case RTPROT_BABEL:
228 proto = ZEBRA_ROUTE_BABEL;
229 break;
230 case RTPROT_BGP:
231 proto = ZEBRA_ROUTE_BGP;
232 break;
233 case RTPROT_OSPF:
996c9314
LB
234 proto = (family == AFI_IP) ? ZEBRA_ROUTE_OSPF
235 : ZEBRA_ROUTE_OSPF6;
915902cb
DS
236 break;
237 case RTPROT_ISIS:
238 proto = ZEBRA_ROUTE_ISIS;
239 break;
240 case RTPROT_RIP:
241 proto = ZEBRA_ROUTE_RIP;
242 break;
243 case RTPROT_RIPNG:
244 proto = ZEBRA_ROUTE_RIPNG;
245 break;
246 case RTPROT_NHRP:
247 proto = ZEBRA_ROUTE_NHRP;
248 break;
249 case RTPROT_EIGRP:
250 proto = ZEBRA_ROUTE_EIGRP;
251 break;
252 case RTPROT_LDP:
253 proto = ZEBRA_ROUTE_LDP;
254 break;
255 case RTPROT_STATIC:
d4d71f11 256 case RTPROT_ZSTATIC:
915902cb
DS
257 proto = ZEBRA_ROUTE_STATIC;
258 break;
0761368a
DS
259 case RTPROT_SHARP:
260 proto = ZEBRA_ROUTE_SHARP;
261 break;
262 case RTPROT_PBR:
263 proto = ZEBRA_ROUTE_PBR;
264 break;
da82f6b4
CF
265 case RTPROT_OPENFABRIC:
266 proto = ZEBRA_ROUTE_OPENFABRIC;
267 break;
38e40db1
SW
268 case RTPROT_ZEBRA:
269 if (is_nexthop) {
270 proto = ZEBRA_ROUTE_NHG;
271 break;
272 }
273 /* Intentional fall thru */
915902cb 274 default:
0761368a
DS
275 /*
276 * When a user adds a new protocol this will show up
277 * to let them know to do something about it. This
278 * is intentionally a warn because we should see
279 * this as part of development of a new protocol
280 */
9df414fe
QY
281 zlog_debug(
282 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
15569c58 283 __func__, proto);
915902cb
DS
284 proto = ZEBRA_ROUTE_KERNEL;
285 break;
286 }
287 return proto;
288}
289
12f6fb97
DS
290/*
291Pending: create an efficient table_id (in a tree/hash) based lookup)
292 */
d7c0a89a 293static vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
12f6fb97 294{
d62a17ae 295 struct vrf *vrf;
296 struct zebra_vrf *zvrf;
12f6fb97 297
a2addae8 298 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
78dd30b2
PG
299 zvrf = vrf->info;
300 if (zvrf == NULL)
d62a17ae 301 continue;
78dd30b2
PG
302 /* case vrf with netns : match the netnsid */
303 if (vrf_is_backend_netns()) {
304 if (ns_id == zvrf_id(zvrf))
305 return zvrf_id(zvrf);
306 } else {
307 /* VRF is VRF_BACKEND_VRF_LITE */
308 if (zvrf->table_id != table_id)
309 continue;
310 return zvrf_id(zvrf);
311 }
d62a17ae 312 }
12f6fb97 313
d62a17ae 314 return VRF_DEFAULT;
12f6fb97
DS
315}
316
87da6a60
SW
317/**
318 * @parse_encap_mpls() - Parses encapsulated mpls attributes
319 * @tb: Pointer to rtattr to look for nested items in.
320 * @labels: Pointer to store labels in.
321 *
322 * Return: Number of mpls labels found.
323 */
324static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
325{
326 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
327 mpls_lse_t *lses = NULL;
328 int num_labels = 0;
329 uint32_t ttl = 0;
330 uint32_t bos = 0;
331 uint32_t exp = 0;
332 mpls_label_t label = 0;
333
334 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
335 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
336 while (!bos && num_labels < MPLS_MAX_LABELS) {
337 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
338 labels[num_labels++] = label;
339 }
340
341 return num_labels;
342}
343
77a44d94
SW
344static struct nexthop
345parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
346 enum blackhole_type bh_type, int index, void *prefsrc,
20822f9d 347 void *gate, afi_t afi, vrf_id_t vrf_id)
77a44d94
SW
348{
349 struct interface *ifp = NULL;
350 struct nexthop nh = {0};
351 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
352 int num_labels = 0;
353
20822f9d 354 vrf_id_t nh_vrf_id = vrf_id;
77a44d94
SW
355 size_t sz = (afi == AFI_IP) ? 4 : 16;
356
357 if (bh_type == BLACKHOLE_UNSPEC) {
358 if (index && !gate)
359 nh.type = NEXTHOP_TYPE_IFINDEX;
360 else if (index && gate)
361 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
362 : NEXTHOP_TYPE_IPV6_IFINDEX;
363 else if (!index && gate)
364 nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
365 : NEXTHOP_TYPE_IPV6;
366 else {
367 nh.type = NEXTHOP_TYPE_BLACKHOLE;
368 nh.bh_type = bh_type;
369 }
370 } else {
371 nh.type = NEXTHOP_TYPE_BLACKHOLE;
372 nh.bh_type = bh_type;
373 }
374 nh.ifindex = index;
375 if (prefsrc)
376 memcpy(&nh.src, prefsrc, sz);
377 if (gate)
378 memcpy(&nh.gate, gate, sz);
379
380 if (index) {
381 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
382 if (ifp)
383 nh_vrf_id = ifp->vrf_id;
384 }
385 nh.vrf_id = nh_vrf_id;
386
387 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
388 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
389 == LWTUNNEL_ENCAP_MPLS) {
390 num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
391 }
392
393 if (rtm->rtm_flags & RTNH_F_ONLINK)
394 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
395
396 if (num_labels)
397 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
398
399 return nh;
400}
401
20822f9d 402static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
0eb97b86 403 struct nexthop_group *ng,
20822f9d
SW
404 struct rtmsg *rtm,
405 struct rtnexthop *rtnh,
406 struct rtattr **tb,
407 void *prefsrc, vrf_id_t vrf_id)
408{
409 void *gate = NULL;
410 struct interface *ifp = NULL;
411 int index = 0;
412 /* MPLS labels */
413 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
414 int num_labels = 0;
415 struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
416
417 int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
418 vrf_id_t nh_vrf_id = vrf_id;
419
20822f9d
SW
420 for (;;) {
421 struct nexthop *nh = NULL;
422
423 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
424 break;
425
426 index = rtnh->rtnh_ifindex;
427 if (index) {
428 /*
429 * Yes we are looking this up
430 * for every nexthop and just
431 * using the last one looked
432 * up right now
433 */
434 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
435 index);
436 if (ifp)
437 nh_vrf_id = ifp->vrf_id;
438 else {
439 flog_warn(
440 EC_ZEBRA_UNKNOWN_INTERFACE,
441 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
15569c58 442 __func__, index);
20822f9d
SW
443 nh_vrf_id = VRF_DEFAULT;
444 }
445 } else
446 nh_vrf_id = vrf_id;
447
448 if (rtnh->rtnh_len > sizeof(*rtnh)) {
449 memset(rtnh_tb, 0, sizeof(rtnh_tb));
450
451 netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
452 rtnh->rtnh_len - sizeof(*rtnh));
453 if (rtnh_tb[RTA_GATEWAY])
454 gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
455 if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
456 && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
457 == LWTUNNEL_ENCAP_MPLS) {
458 num_labels = parse_encap_mpls(
459 rtnh_tb[RTA_ENCAP], labels);
460 }
461 }
462
f3354e16
SW
463 if (gate && rtm->rtm_family == AF_INET) {
464 if (index)
0eb97b86
MS
465 nh = nexthop_from_ipv4_ifindex(
466 gate, prefsrc, index, nh_vrf_id);
f3354e16 467 else
0eb97b86
MS
468 nh = nexthop_from_ipv4(gate, prefsrc,
469 nh_vrf_id);
f3354e16
SW
470 } else if (gate && rtm->rtm_family == AF_INET6) {
471 if (index)
0eb97b86
MS
472 nh = nexthop_from_ipv6_ifindex(
473 gate, index, nh_vrf_id);
f3354e16 474 else
0eb97b86 475 nh = nexthop_from_ipv6(gate, nh_vrf_id);
20822f9d 476 } else
0eb97b86 477 nh = nexthop_from_ifindex(index, nh_vrf_id);
20822f9d
SW
478
479 if (nh) {
df7fb580
DS
480 nh->weight = rtnh->rtnh_hops + 1;
481
20822f9d
SW
482 if (num_labels)
483 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
484 num_labels, labels);
485
486 if (rtnh->rtnh_flags & RTNH_F_ONLINK)
487 SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
0eb97b86
MS
488
489 /* Add to temporary list */
490 nexthop_group_add_sorted(ng, nh);
20822f9d
SW
491 }
492
493 if (rtnh->rtnh_len == 0)
494 break;
495
496 len -= NLMSG_ALIGN(rtnh->rtnh_len);
497 rtnh = RTNH_NEXT(rtnh);
498 }
499
0eb97b86 500 uint8_t nhop_num = nexthop_group_nexthop_num(ng);
20822f9d
SW
501
502 return nhop_num;
503}
504
718e3744 505/* Looking up routing table by netlink interface. */
2414abd3 506static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
d62a17ae 507 int startup)
718e3744 508{
d62a17ae 509 int len;
510 struct rtmsg *rtm;
511 struct rtattr *tb[RTA_MAX + 1];
d7c0a89a 512 uint8_t flags = 0;
d62a17ae 513 struct prefix p;
792fa92e 514 struct prefix_ipv6 src_p = {};
78dd30b2 515 vrf_id_t vrf_id;
6a6d11a3 516 bool selfroute;
d62a17ae 517
518 char anyaddr[16] = {0};
519
915902cb 520 int proto = ZEBRA_ROUTE_KERNEL;
d62a17ae 521 int index = 0;
522 int table;
523 int metric = 0;
d7c0a89a 524 uint32_t mtu = 0;
25715c7e 525 uint8_t distance = 0;
4e40b6d6 526 route_tag_t tag = 0;
fcc89a9c 527 uint32_t nhe_id = 0;
d62a17ae 528
529 void *dest = NULL;
530 void *gate = NULL;
531 void *prefsrc = NULL; /* IPv4 preferred source host address */
532 void *src = NULL; /* IPv6 srcdest source prefix */
e655a03c 533 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
d62a17ae 534
535 rtm = NLMSG_DATA(h);
536
537 if (startup && h->nlmsg_type != RTM_NEWROUTE)
538 return 0;
e655a03c
DL
539 switch (rtm->rtm_type) {
540 case RTN_UNICAST:
541 break;
542 case RTN_BLACKHOLE:
543 bh_type = BLACKHOLE_NULL;
544 break;
545 case RTN_UNREACHABLE:
546 bh_type = BLACKHOLE_REJECT;
547 break;
548 case RTN_PROHIBIT:
549 bh_type = BLACKHOLE_ADMINPROHIB;
550 break;
551 default:
8c8f250b
DS
552 if (IS_ZEBRA_DEBUG_KERNEL)
553 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
554 nl_rttype_to_str(rtm->rtm_type),
555 rtm->rtm_type);
d62a17ae 556 return 0;
e655a03c 557 }
d62a17ae 558
559 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 560 if (len < 0) {
15569c58
DA
561 zlog_err(
562 "%s: Message received from netlink is of a broken size %d %zu",
563 __func__, h->nlmsg_len,
564 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 565 return -1;
9bdf8618 566 }
d62a17ae 567
0d6f7fd6 568 memset(tb, 0, sizeof(tb));
d62a17ae 569 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
570
571 if (rtm->rtm_flags & RTM_F_CLONED)
572 return 0;
573 if (rtm->rtm_protocol == RTPROT_REDIRECT)
574 return 0;
575 if (rtm->rtm_protocol == RTPROT_KERNEL)
576 return 0;
577
6a6d11a3
NN
578 selfroute = is_selfroute(rtm->rtm_protocol);
579
580 if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE) {
6ab5222f
DS
581 if (IS_ZEBRA_DEBUG_KERNEL)
582 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
583 rtm->rtm_protocol);
d62a17ae 584 return 0;
6ab5222f 585 }
d62a17ae 586
587 /* We don't care about change notifications for the MPLS table. */
588 /* TODO: Revisit this. */
589 if (rtm->rtm_family == AF_MPLS)
590 return 0;
591
592 /* Table corresponding to route. */
593 if (tb[RTA_TABLE])
594 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
595 else
596 table = rtm->rtm_table;
597
598 /* Map to VRF */
78dd30b2 599 vrf_id = vrf_lookup_by_table(table, ns_id);
d62a17ae 600 if (vrf_id == VRF_DEFAULT) {
601 if (!is_zebra_valid_kernel_table(table)
602 && !is_zebra_main_routing_table(table))
603 return 0;
604 }
605
606 /* Route which inserted by Zebra. */
6a6d11a3 607 if (selfroute) {
d62a17ae 608 flags |= ZEBRA_FLAG_SELFROUTE;
38e40db1 609 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
915902cb 610 }
d62a17ae 611 if (tb[RTA_OIF])
612 index = *(int *)RTA_DATA(tb[RTA_OIF]);
613
614 if (tb[RTA_DST])
615 dest = RTA_DATA(tb[RTA_DST]);
616 else
617 dest = anyaddr;
618
619 if (tb[RTA_SRC])
620 src = RTA_DATA(tb[RTA_SRC]);
621 else
622 src = anyaddr;
623
624 if (tb[RTA_PREFSRC])
625 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
626
627 if (tb[RTA_GATEWAY])
628 gate = RTA_DATA(tb[RTA_GATEWAY]);
629
fcc89a9c
SW
630 if (tb[RTA_NH_ID])
631 nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
632
f19435a8
DS
633 if (tb[RTA_PRIORITY])
634 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
d62a17ae 635
4e40b6d6
KK
636#if defined(SUPPORT_REALMS)
637 if (tb[RTA_FLOW])
638 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
639#endif
640
f19435a8
DS
641 if (tb[RTA_METRICS]) {
642 struct rtattr *mxrta[RTAX_MAX + 1];
d62a17ae 643
0d6f7fd6 644 memset(mxrta, 0, sizeof(mxrta));
996c9314 645 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
f19435a8 646 RTA_PAYLOAD(tb[RTA_METRICS]));
d62a17ae 647
f19435a8 648 if (mxrta[RTAX_MTU])
d7c0a89a 649 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
d62a17ae 650 }
651
652 if (rtm->rtm_family == AF_INET) {
653 p.family = AF_INET;
930571d2 654 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
e17d9b2d 655 zlog_err(
75829703 656 "Invalid destination prefix length: %u received from kernel route change",
930571d2 657 rtm->rtm_dst_len);
e17d9b2d 658 return -1;
930571d2 659 }
d62a17ae 660 memcpy(&p.u.prefix4, dest, 4);
661 p.prefixlen = rtm->rtm_dst_len;
662
1f610a1f
CF
663 if (rtm->rtm_src_len != 0) {
664 char buf[PREFIX_STRLEN];
9df414fe 665 flog_warn(
e914ccbe 666 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
9df414fe
QY
667 "unsupported IPv4 sourcedest route (dest %s vrf %u)",
668 prefix2str(&p, buf, sizeof(buf)), vrf_id);
1f610a1f
CF
669 return 0;
670 }
930571d2 671
1f610a1f
CF
672 /* Force debug below to not display anything for source */
673 src_p.prefixlen = 0;
d62a17ae 674 } else if (rtm->rtm_family == AF_INET6) {
675 p.family = AF_INET6;
930571d2 676 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
e17d9b2d 677 zlog_err(
75829703 678 "Invalid destination prefix length: %u received from kernel route change",
930571d2 679 rtm->rtm_dst_len);
e17d9b2d 680 return -1;
930571d2 681 }
d62a17ae 682 memcpy(&p.u.prefix6, dest, 16);
683 p.prefixlen = rtm->rtm_dst_len;
684
685 src_p.family = AF_INET6;
930571d2 686 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
e17d9b2d 687 zlog_err(
75829703 688 "Invalid source prefix length: %u received from kernel route change",
930571d2 689 rtm->rtm_src_len);
e17d9b2d 690 return -1;
930571d2 691 }
d62a17ae 692 memcpy(&src_p.prefix, src, 16);
693 src_p.prefixlen = rtm->rtm_src_len;
694 }
695
25715c7e
DS
696 /*
697 * For ZEBRA_ROUTE_KERNEL types:
698 *
699 * The metric/priority of the route received from the kernel
700 * is a 32 bit number. We are going to interpret the high
701 * order byte as the Admin Distance and the low order 3 bytes
702 * as the metric.
703 *
704 * This will allow us to do two things:
705 * 1) Allow the creation of kernel routes that can be
706 * overridden by zebra.
707 * 2) Allow the old behavior for 'most' kernel route types
708 * if a user enters 'ip route ...' v4 routes get a metric
709 * of 0 and v6 routes get a metric of 1024. Both of these
710 * values will end up with a admin distance of 0, which
711 * will cause them to win for the purposes of zebra.
712 */
713 if (proto == ZEBRA_ROUTE_KERNEL) {
714 distance = (metric >> 24) & 0xFF;
996c9314 715 metric = (metric & 0x00FFFFFF);
25715c7e
DS
716 }
717
d62a17ae 718 if (IS_ZEBRA_DEBUG_KERNEL) {
719 char buf[PREFIX_STRLEN];
720 char buf2[PREFIX_STRLEN];
45df4e96 721 zlog_debug("%s %s%s%s vrf %u(%u) metric: %d Admin Distance: %d",
996c9314
LB
722 nl_msg_type_to_str(h->nlmsg_type),
723 prefix2str(&p, buf, sizeof(buf)),
724 src_p.prefixlen ? " from " : "",
725 src_p.prefixlen
726 ? prefix2str(&src_p, buf2, sizeof(buf2))
727 : "",
45df4e96 728 vrf_id, table, metric, distance);
d62a17ae 729 }
730
731 afi_t afi = AFI_IP;
732 if (rtm->rtm_family == AF_INET6)
733 afi = AFI_IP6;
734
735 if (h->nlmsg_type == RTM_NEWROUTE) {
8795f904 736
fd36be7e 737 if (!tb[RTA_MULTIPATH]) {
77a44d94 738 struct nexthop nh = {0};
8795f904 739
77a44d94
SW
740 if (!nhe_id) {
741 nh = parse_nexthop_unicast(
742 ns_id, rtm, tb, bh_type, index, prefsrc,
20822f9d 743 gate, afi, vrf_id);
87da6a60 744 }
4a7371e9 745 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
8032b717
SW
746 &src_p, &nh, nhe_id, table, metric, mtu,
747 distance, tag);
fd36be7e 748 } else {
d62a17ae 749 /* This is a multipath route */
d62a17ae 750 struct route_entry *re;
0eb97b86 751 struct nexthop_group *ng = NULL;
d62a17ae 752 struct rtnexthop *rtnh =
753 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
d62a17ae 754
755 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
915902cb 756 re->type = proto;
25715c7e 757 re->distance = distance;
d62a17ae 758 re->flags = flags;
759 re->metric = metric;
760 re->mtu = mtu;
761 re->vrf_id = vrf_id;
762 re->table = table;
98572489 763 re->uptime = monotime(NULL);
4e40b6d6 764 re->tag = tag;
bbb322f2 765 re->nhe_id = nhe_id;
3c04071d 766
20822f9d 767 if (!nhe_id) {
0eb97b86
MS
768 uint8_t nhop_num;
769
770 /* Use temporary list of nexthops; parse
771 * message payload's nexthops.
772 */
773 ng = nexthop_group_new();
774 nhop_num =
20822f9d 775 parse_multipath_nexthops_unicast(
0eb97b86 776 ns_id, ng, rtm, rtnh, tb,
20822f9d
SW
777 prefsrc, vrf_id);
778
779 zserv_nexthop_num_warn(
780 __func__, (const struct prefix *)&p,
781 nhop_num);
0eb97b86
MS
782
783 if (nhop_num == 0) {
784 nexthop_group_delete(&ng);
785 ng = NULL;
786 }
d62a17ae 787 }
788
0eb97b86 789 if (nhe_id || ng)
1f610a1f 790 rib_add_multipath(afi, SAFI_UNICAST, &p,
0eb97b86 791 &src_p, re, ng);
20822f9d
SW
792 else
793 XFREE(MTYPE_RE, re);
d62a17ae 794 }
795 } else {
bc541126
SW
796 if (nhe_id) {
797 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
798 &p, &src_p, NULL, nhe_id, table, metric,
799 distance, true);
800 } else {
801 if (!tb[RTA_MULTIPATH]) {
802 struct nexthop nh;
760f39dc
HS
803
804 nh = parse_nexthop_unicast(
805 ns_id, rtm, tb, bh_type, index, prefsrc,
806 gate, afi, vrf_id);
bc541126
SW
807 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
808 flags, &p, &src_p, &nh, 0, table,
809 metric, distance, true);
8ba5bd58 810 } else {
bc541126
SW
811 /* XXX: need to compare the entire list of
812 * nexthops here for NLM_F_APPEND stupidity */
813 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
814 flags, &p, &src_p, NULL, 0, table,
815 metric, distance, true);
8ba5bd58 816 }
d62a17ae 817 }
818 }
819
820 return 0;
718e3744 821}
822
e3be0432
DS
823static struct mcast_route_data *mroute = NULL;
824
2414abd3 825static int netlink_route_change_read_multicast(struct nlmsghdr *h,
d62a17ae 826 ns_id_t ns_id, int startup)
565fdc75 827{
d62a17ae 828 int len;
829 struct rtmsg *rtm;
830 struct rtattr *tb[RTA_MAX + 1];
831 struct mcast_route_data *m;
832 struct mcast_route_data mr;
833 int iif = 0;
834 int count;
835 int oif[256];
836 int oif_count = 0;
837 char sbuf[40];
838 char gbuf[40];
839 char oif_list[256] = "\0";
78dd30b2 840 vrf_id_t vrf;
43b5cc5e 841 int table;
d62a17ae 842
843 if (mroute)
844 m = mroute;
845 else {
846 memset(&mr, 0, sizeof(mr));
847 m = &mr;
848 }
849
850 rtm = NLMSG_DATA(h);
851
852 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
853
0d6f7fd6 854 memset(tb, 0, sizeof(tb));
d62a17ae 855 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
90d82769 856
43b5cc5e
DS
857 if (tb[RTA_TABLE])
858 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
859 else
860 table = rtm->rtm_table;
861
78dd30b2 862 vrf = vrf_lookup_by_table(table, ns_id);
43b5cc5e 863
d62a17ae 864 if (tb[RTA_IIF])
865 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
866
867 if (tb[RTA_SRC])
bd8b9272 868 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
d62a17ae 869
870 if (tb[RTA_DST])
bd8b9272 871 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
d62a17ae 872
62819462 873 if (tb[RTA_EXPIRES])
d62a17ae 874 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
875
876 if (tb[RTA_MULTIPATH]) {
877 struct rtnexthop *rtnh =
878 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
879
880 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
881 for (;;) {
882 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
883 break;
884
885 oif[oif_count] = rtnh->rtnh_ifindex;
886 oif_count++;
887
3c04071d
SW
888 if (rtnh->rtnh_len == 0)
889 break;
890
d62a17ae 891 len -= NLMSG_ALIGN(rtnh->rtnh_len);
892 rtnh = RTNH_NEXT(rtnh);
893 }
894 }
895
896 if (IS_ZEBRA_DEBUG_KERNEL) {
822c9af2
SW
897 struct interface *ifp = NULL;
898 struct zebra_vrf *zvrf = NULL;
899
0af35d90
RW
900 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
901 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
d62a17ae 902 for (count = 0; count < oif_count; count++) {
903 ifp = if_lookup_by_index(oif[count], vrf);
904 char temp[256];
905
5b4256ca
DS
906 sprintf(temp, "%s(%d) ", ifp ? ifp->name : "Unknown",
907 oif[count]);
eab4a5c2 908 strlcat(oif_list, temp, sizeof(oif_list));
d62a17ae 909 }
822c9af2 910 zvrf = zebra_vrf_lookup_by_id(vrf);
d62a17ae 911 ifp = if_lookup_by_index(iif, vrf);
822c9af2
SW
912 zlog_debug(
913 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s(%d) OIF: %s jiffies: %lld",
914 (zvrf ? zvrf->vrf->name : "Unknown"), vrf,
915 nl_msg_type_to_str(h->nlmsg_type), sbuf, gbuf,
916 ifp ? ifp->name : "Unknown", iif, oif_list,
917 m->lastused);
90d82769 918 }
d62a17ae 919 return 0;
565fdc75
DS
920}
921
2414abd3 922int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
565fdc75 923{
d62a17ae 924 int len;
d62a17ae 925 struct rtmsg *rtm;
926
927 rtm = NLMSG_DATA(h);
928
929 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
930 /* If this is not route add/delete message print warning. */
9165c5f5 931 zlog_debug("Kernel message: %s NS %u",
87b5d1b0 932 nl_msg_type_to_str(h->nlmsg_type), ns_id);
d62a17ae 933 return 0;
934 }
935
c25e2f1a
DS
936 if (!(rtm->rtm_family == AF_INET ||
937 rtm->rtm_family == AF_INET6 ||
938 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
9df414fe 939 flog_warn(
e914ccbe 940 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
941 "Invalid address family: %u received from kernel route change: %s",
942 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
943 return 0;
944 }
945
d62a17ae 946 /* Connected route. */
947 if (IS_ZEBRA_DEBUG_KERNEL)
78dd30b2 948 zlog_debug("%s %s %s proto %s NS %u",
d62a17ae 949 nl_msg_type_to_str(h->nlmsg_type),
950 nl_family_to_str(rtm->rtm_family),
951 nl_rttype_to_str(rtm->rtm_type),
78dd30b2 952 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
d62a17ae 953
d62a17ae 954
955 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618 956 if (len < 0) {
15569c58
DA
957 zlog_err(
958 "%s: Message received from netlink is of a broken size: %d %zu",
959 __func__, h->nlmsg_len,
960 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 961 return -1;
9bdf8618 962 }
d62a17ae 963
e655a03c 964 if (rtm->rtm_type == RTN_MULTICAST)
2414abd3 965 netlink_route_change_read_multicast(h, ns_id, startup);
e655a03c 966 else
2414abd3 967 netlink_route_change_read_unicast(h, ns_id, startup);
d62a17ae 968 return 0;
565fdc75
DS
969}
970
289602d7 971/* Request for specific route information from the kernel */
d62a17ae 972static int netlink_request_route(struct zebra_ns *zns, int family, int type)
289602d7 973{
d62a17ae 974 struct {
975 struct nlmsghdr n;
976 struct rtmsg rtm;
977 } req;
978
979 /* Form the request, specifying filter (rtattr) if needed. */
980 memset(&req, 0, sizeof(req));
981 req.n.nlmsg_type = type;
718f9b0f 982 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 983 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
984 req.rtm.rtm_family = family;
985
986 return netlink_request(&zns->netlink_cmd, &req.n);
289602d7 987}
988
718e3744 989/* Routing table read function using netlink interface. Only called
990 bootstrap time. */
d62a17ae 991int netlink_route_read(struct zebra_ns *zns)
718e3744 992{
d62a17ae 993 int ret;
85a75f1e
MS
994 struct zebra_dplane_info dp_info;
995
996 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 997
998 /* Get IPv4 routing table. */
999 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1000 if (ret < 0)
1001 return ret;
1002 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 1003 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 1004 if (ret < 0)
1005 return ret;
1006
1007 /* Get IPv6 routing table. */
1008 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1009 if (ret < 0)
1010 return ret;
1011 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 1012 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 1013 if (ret < 0)
1014 return ret;
1015
1016 return 0;
718e3744 1017}
1018
d7c0a89a
QY
1019static void _netlink_route_nl_add_gateway_info(uint8_t route_family,
1020 uint8_t gw_family,
d62a17ae 1021 struct nlmsghdr *nlmsg,
1022 size_t req_size, int bytelen,
81793ac1 1023 const struct nexthop *nexthop)
40c7bdb0 1024{
d62a17ae 1025 if (route_family == AF_MPLS) {
1026 struct gw_family_t gw_fam;
1027
1028 gw_fam.family = gw_family;
1029 if (gw_family == AF_INET)
1030 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1031 else
1032 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1033 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1034 bytelen + 2);
1035 } else {
1036 if (gw_family == AF_INET)
1037 addattr_l(nlmsg, req_size, RTA_GATEWAY,
1038 &nexthop->gate.ipv4, bytelen);
1039 else
1040 addattr_l(nlmsg, req_size, RTA_GATEWAY,
1041 &nexthop->gate.ipv6, bytelen);
1042 }
40c7bdb0 1043}
1044
d7c0a89a
QY
1045static void _netlink_route_rta_add_gateway_info(uint8_t route_family,
1046 uint8_t gw_family,
d62a17ae 1047 struct rtattr *rta,
1048 struct rtnexthop *rtnh,
1049 size_t req_size, int bytelen,
81793ac1 1050 const struct nexthop *nexthop)
40c7bdb0 1051{
d62a17ae 1052 if (route_family == AF_MPLS) {
1053 struct gw_family_t gw_fam;
1054
1055 gw_fam.family = gw_family;
1056 if (gw_family == AF_INET)
1057 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1058 else
1059 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1060 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
1061 bytelen + 2);
1062 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
1063 } else {
1064 if (gw_family == AF_INET)
1065 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1066 &nexthop->gate.ipv4, bytelen);
1067 else
1068 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1069 &nexthop->gate.ipv6, bytelen);
1070 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1071 }
40c7bdb0 1072}
1073
b7537db6
SW
1074static int build_label_stack(struct mpls_label_stack *nh_label,
1075 mpls_lse_t *out_lse, char *label_buf,
1076 size_t label_buf_size)
1077{
1078 char label_buf1[20];
1079 int num_labels = 0;
1080
1081 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1082 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1083 continue;
1084
1085 if (IS_ZEBRA_DEBUG_KERNEL) {
1086 if (!num_labels)
1087 sprintf(label_buf, "label %u",
1088 nh_label->label[i]);
1089 else {
1090 sprintf(label_buf1, "/%u", nh_label->label[i]);
1091 strlcat(label_buf, label_buf1, label_buf_size);
1092 }
1093 }
1094
1095 out_lse[num_labels] =
1096 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1097 num_labels++;
1098 }
1099
1100 return num_labels;
1101}
1102
fa713d9e
CF
1103/* This function takes a nexthop as argument and adds
1104 * the appropriate netlink attributes to an existing
1105 * netlink message.
1106 *
1107 * @param routedesc: Human readable description of route type
1108 * (direct/recursive, single-/multipath)
1109 * @param bytelen: Length of addresses in bytes.
1110 * @param nexthop: Nexthop information
1111 * @param nlmsg: nlmsghdr structure to fill in.
1112 * @param req_size: The size allocated for the message.
1113 */
9a0132a5
DS
1114static void _netlink_route_build_singlepath(const struct prefix *p,
1115 const char *routedesc, int bytelen,
81793ac1 1116 const struct nexthop *nexthop,
d62a17ae 1117 struct nlmsghdr *nlmsg,
1118 struct rtmsg *rtmsg,
1119 size_t req_size, int cmd)
fa713d9e 1120{
b7537db6 1121
d62a17ae 1122 mpls_lse_t out_lse[MPLS_MAX_LABELS];
9a62e84b 1123 char label_buf[256];
b7537db6
SW
1124 int num_labels = 0;
1125
1126 assert(nexthop);
d62a17ae 1127
1128 /*
1129 * label_buf is *only* currently used within debugging.
1130 * As such when we assign it we are guarding it inside
1131 * a debug test. If you want to change this make sure
1132 * you fix this assumption
1133 */
1134 label_buf[0] = '\0';
d62a17ae 1135
b7537db6
SW
1136 num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf,
1137 sizeof(label_buf));
fa712963
RW
1138
1139 if (num_labels) {
1140 /* Set the BoS bit */
1141 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1142
1143 if (rtmsg->rtm_family == AF_MPLS)
1144 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
1145 num_labels * sizeof(mpls_lse_t));
1146 else {
1147 struct rtattr *nest;
d7c0a89a 1148 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
fa712963
RW
1149
1150 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE, &encap,
d7c0a89a 1151 sizeof(uint16_t));
fa712963
RW
1152 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
1153 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST, &out_lse,
1154 num_labels * sizeof(mpls_lse_t));
1155 addattr_nest_end(nlmsg, nest);
66d42727 1156 }
0aabccc0 1157 }
fa713d9e 1158
d62a17ae 1159 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1160 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1161
1162 if (rtmsg->rtm_family == AF_INET
1163 && (nexthop->type == NEXTHOP_TYPE_IPV6
1164 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1165 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1166 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
1167 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1168
975a328e
DA
1169 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY
1170 && (cmd == RTM_NEWROUTE))
d62a17ae 1171 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1172 &nexthop->rmap_src.ipv4, bytelen);
975a328e
DA
1173 else if (nexthop->src.ipv4.s_addr != INADDR_ANY
1174 && (cmd == RTM_NEWROUTE))
d62a17ae 1175 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1176 &nexthop->src.ipv4, bytelen);
1177
1178 if (IS_ZEBRA_DEBUG_KERNEL)
1179 zlog_debug(
9a0132a5
DS
1180 " 5549: _netlink_route_build_singlepath() (%s): %pFX nexthop via %s %s if %u(%u)",
1181 routedesc, p, ipv4_ll_buf, label_buf,
7556c3fd 1182 nexthop->ifindex, nexthop->vrf_id);
d62a17ae 1183 return;
0aabccc0
DD
1184 }
1185
d62a17ae 1186 if (nexthop->type == NEXTHOP_TYPE_IPV4
1187 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1188 /* Send deletes to the kernel without specifying the next-hop */
1189 if (cmd != RTM_DELROUTE)
1190 _netlink_route_nl_add_gateway_info(
1191 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1192 bytelen, nexthop);
1193
1194 if (cmd == RTM_NEWROUTE) {
975a328e 1195 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1196 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1197 &nexthop->rmap_src.ipv4, bytelen);
975a328e 1198 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1199 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1200 &nexthop->src.ipv4, bytelen);
1201 }
1202
1203 if (IS_ZEBRA_DEBUG_KERNEL)
1204 zlog_debug(
9a0132a5
DS
1205 "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u(%u)",
1206 routedesc, p, inet_ntoa(nexthop->gate.ipv4),
7556c3fd 1207 label_buf, nexthop->ifindex, nexthop->vrf_id);
0aabccc0 1208 }
fa713d9e 1209
d62a17ae 1210 if (nexthop->type == NEXTHOP_TYPE_IPV6
1211 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1212 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1213 nlmsg, req_size, bytelen,
1214 nexthop);
1215
1216 if (cmd == RTM_NEWROUTE) {
1217 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1218 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1219 &nexthop->rmap_src.ipv6, bytelen);
1220 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1221 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1222 &nexthop->src.ipv6, bytelen);
1223 }
fa713d9e 1224
d62a17ae 1225 if (IS_ZEBRA_DEBUG_KERNEL)
1226 zlog_debug(
9a0132a5
DS
1227 "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u(%u)",
1228 routedesc, p, inet6_ntoa(nexthop->gate.ipv6),
7556c3fd 1229 label_buf, nexthop->ifindex, nexthop->vrf_id);
d62a17ae 1230 }
5e210522
DS
1231
1232 /*
1233 * We have the ifindex so we should always send it
1234 * This is especially useful if we are doing route
1235 * leaking.
1236 */
1237 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
d62a17ae 1238 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1239
275565fb 1240 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1241 if (cmd == RTM_NEWROUTE) {
975a328e 1242 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1243 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1244 &nexthop->rmap_src.ipv4, bytelen);
975a328e 1245 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1246 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1247 &nexthop->src.ipv4, bytelen);
1248 }
fa713d9e 1249
d62a17ae 1250 if (IS_ZEBRA_DEBUG_KERNEL)
1251 zlog_debug(
9a0132a5
DS
1252 "netlink_route_multipath() (%s): %pFX nexthop via if %u(%u)",
1253 routedesc, p, nexthop->ifindex,
1254 nexthop->vrf_id);
0aabccc0 1255 }
fa713d9e
CF
1256}
1257
1258/* This function takes a nexthop as argument and
1259 * appends to the given rtattr/rtnexthop pair the
1260 * representation of the nexthop. If the nexthop
1261 * defines a preferred source, the src parameter
1262 * will be modified to point to that src, otherwise
1263 * it will be kept unmodified.
1264 *
1265 * @param routedesc: Human readable description of route type
1266 * (direct/recursive, single-/multipath)
1267 * @param bytelen: Length of addresses in bytes.
1268 * @param nexthop: Nexthop information
1269 * @param rta: rtnetlink attribute structure
1270 * @param rtnh: pointer to an rtnetlink nexthop structure
1271 * @param src: pointer pointing to a location where
1272 * the prefsrc should be stored.
1273 */
9a0132a5
DS
1274static void
1275_netlink_route_build_multipath(const struct prefix *p, const char *routedesc,
1276 int bytelen, const struct nexthop *nexthop,
1277 struct rtattr *rta, struct rtnexthop *rtnh,
1278 struct rtmsg *rtmsg, const union g_addr **src)
fa713d9e 1279{
d62a17ae 1280 mpls_lse_t out_lse[MPLS_MAX_LABELS];
9a62e84b 1281 char label_buf[256];
b7537db6 1282 int num_labels = 0;
d62a17ae 1283
1284 rtnh->rtnh_len = sizeof(*rtnh);
1285 rtnh->rtnh_flags = 0;
1286 rtnh->rtnh_hops = 0;
1287 rta->rta_len += rtnh->rtnh_len;
1288
b7537db6
SW
1289 assert(nexthop);
1290
d62a17ae 1291 /*
1292 * label_buf is *only* currently used within debugging.
1293 * As such when we assign it we are guarding it inside
1294 * a debug test. If you want to change this make sure
1295 * you fix this assumption
1296 */
1297 label_buf[0] = '\0';
d62a17ae 1298
b7537db6
SW
1299 num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf,
1300 sizeof(label_buf));
fa712963
RW
1301
1302 if (num_labels) {
1303 /* Set the BoS bit */
1304 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1305
1306 if (rtmsg->rtm_family == AF_MPLS) {
1307 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1308 &out_lse,
1309 num_labels * sizeof(mpls_lse_t));
1310 rtnh->rtnh_len +=
1311 RTA_LENGTH(num_labels * sizeof(mpls_lse_t));
1312 } else {
1313 struct rtattr *nest;
d7c0a89a 1314 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
fa712963
RW
1315 int len = rta->rta_len;
1316
1317 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_ENCAP_TYPE,
d7c0a89a 1318 &encap, sizeof(uint16_t));
fa712963
RW
1319 nest = rta_nest(rta, NL_PKT_BUF_SIZE, RTA_ENCAP);
1320 rta_addattr_l(rta, NL_PKT_BUF_SIZE, MPLS_IPTUNNEL_DST,
1321 &out_lse,
1322 num_labels * sizeof(mpls_lse_t));
1323 rta_nest_end(rta, nest);
1324 rtnh->rtnh_len += rta->rta_len - len;
66d42727 1325 }
d62a17ae 1326 }
1327
1328 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1329 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1330
1331 if (rtmsg->rtm_family == AF_INET
1332 && (nexthop->type == NEXTHOP_TYPE_IPV6
1333 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1334 bytelen = 4;
1335 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1336 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1337 bytelen);
1338 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1339 rtnh->rtnh_ifindex = nexthop->ifindex;
8d27e1aa 1340 if (nexthop->weight)
1341 rtnh->rtnh_hops = nexthop->weight - 1;
d62a17ae 1342
975a328e 1343 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1344 *src = &nexthop->rmap_src;
975a328e 1345 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1346 *src = &nexthop->src;
1347
1348 if (IS_ZEBRA_DEBUG_KERNEL)
1349 zlog_debug(
9a0132a5
DS
1350 " 5549: netlink_route_build_multipath() (%s): %pFX nexthop via %s %s if %u",
1351 routedesc, p, ipv4_ll_buf, label_buf,
d62a17ae 1352 nexthop->ifindex);
1353 return;
1354 }
1355
1356 if (nexthop->type == NEXTHOP_TYPE_IPV4
1357 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1358 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1359 rta, rtnh, NL_PKT_BUF_SIZE,
1360 bytelen, nexthop);
975a328e 1361 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1362 *src = &nexthop->rmap_src;
975a328e 1363 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1364 *src = &nexthop->src;
1365
1366 if (IS_ZEBRA_DEBUG_KERNEL)
1367 zlog_debug(
9a0132a5
DS
1368 "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u",
1369 routedesc, p, inet_ntoa(nexthop->gate.ipv4),
d62a17ae 1370 label_buf, nexthop->ifindex);
1371 }
1372 if (nexthop->type == NEXTHOP_TYPE_IPV6
1373 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1374 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1375 rta, rtnh, NL_PKT_BUF_SIZE,
1376 bytelen, nexthop);
1377
1378 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1379 *src = &nexthop->rmap_src;
1380 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1381 *src = &nexthop->src;
1382
1383 if (IS_ZEBRA_DEBUG_KERNEL)
1384 zlog_debug(
9a0132a5
DS
1385 "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u",
1386 routedesc, p, inet6_ntoa(nexthop->gate.ipv6),
d62a17ae 1387 label_buf, nexthop->ifindex);
1388 }
5e210522
DS
1389
1390 /*
1391 * We have figured out the ifindex so we should always send it
1392 * This is especially useful if we are doing route
1393 * leaking.
1394 */
1395 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1396 rtnh->rtnh_ifindex = nexthop->ifindex;
1397
d62a17ae 1398 /* ifindex */
275565fb 1399 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
975a328e 1400 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1401 *src = &nexthop->rmap_src;
975a328e 1402 else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
d62a17ae 1403 *src = &nexthop->src;
1404
1405 if (IS_ZEBRA_DEBUG_KERNEL)
1406 zlog_debug(
9a0132a5
DS
1407 "netlink_route_multipath() (%s): %pFX nexthop via if %u",
1408 routedesc, p, nexthop->ifindex);
d62a17ae 1409 }
df7fb580
DS
1410
1411 if (nexthop->weight)
1412 rtnh->rtnh_hops = nexthop->weight - 1;
fa713d9e
CF
1413}
1414
9a0132a5
DS
1415static inline void _netlink_mpls_build_singlepath(const struct prefix *p,
1416 const char *routedesc,
81793ac1 1417 const zebra_nhlfe_t *nhlfe,
d62a17ae 1418 struct nlmsghdr *nlmsg,
1419 struct rtmsg *rtmsg,
1420 size_t req_size, int cmd)
40c7bdb0 1421{
d62a17ae 1422 int bytelen;
d7c0a89a 1423 uint8_t family;
40c7bdb0 1424
d62a17ae 1425 family = NHLFE_FAMILY(nhlfe);
1426 bytelen = (family == AF_INET ? 4 : 16);
9a0132a5 1427 _netlink_route_build_singlepath(p, routedesc, bytelen, nhlfe->nexthop,
d62a17ae 1428 nlmsg, rtmsg, req_size, cmd);
40c7bdb0 1429}
1430
1431
1432static inline void
9a0132a5
DS
1433_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
1434 const zebra_nhlfe_t *nhlfe, struct rtattr *rta,
1435 struct rtnexthop *rtnh, struct rtmsg *rtmsg,
1436 const union g_addr **src)
40c7bdb0 1437{
d62a17ae 1438 int bytelen;
d7c0a89a 1439 uint8_t family;
40c7bdb0 1440
d62a17ae 1441 family = NHLFE_FAMILY(nhlfe);
1442 bytelen = (family == AF_INET ? 4 : 16);
9a0132a5
DS
1443 _netlink_route_build_multipath(p, routedesc, bytelen, nhlfe->nexthop,
1444 rta, rtnh, rtmsg, src);
40c7bdb0 1445}
1446
1447
fa713d9e
CF
1448/* Log debug information for netlink_route_multipath
1449 * if debug logging is enabled.
1450 *
1451 * @param cmd: Netlink command which is to be processed
1452 * @param p: Prefix for which the change is due
fa713d9e 1453 * @param family: Address family which the change concerns
45df4e96
DS
1454 * @param zvrf: The vrf we are in
1455 * @param tableid: The table we are working on
fa713d9e 1456 */
86391e56
MS
1457static void _netlink_route_debug(int cmd, const struct prefix *p,
1458 int family, vrf_id_t vrfid,
7556c3fd 1459 uint32_t tableid)
fa713d9e 1460{
d62a17ae 1461 if (IS_ZEBRA_DEBUG_KERNEL) {
1462 char buf[PREFIX_STRLEN];
1463 zlog_debug(
45df4e96
DS
1464 "netlink_route_multipath(): %s %s vrf %u(%u)",
1465 nl_msg_type_to_str(cmd),
1466 prefix2str(p, buf, sizeof(buf)),
86391e56 1467 vrfid, tableid);
d62a17ae 1468 }
1469}
1470
e8968ccb
SW
1471static void _netlink_nexthop_debug(int cmd, uint32_t id)
1472{
1473 if (IS_ZEBRA_DEBUG_KERNEL)
1474 zlog_debug("netlink_nexthop(): %s, id=%u",
1475 nl_msg_type_to_str(cmd), id);
1476}
1477
d7c0a89a 1478static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
40c7bdb0 1479{
d62a17ae 1480 if (IS_ZEBRA_DEBUG_KERNEL)
1481 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1482 nl_msg_type_to_str(cmd), label);
fa713d9e
CF
1483}
1484
d62a17ae 1485static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
5895d33f 1486 int llalen, ns_id_t ns_id)
5c610faf 1487{
f3dbec60 1488 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 1489 struct {
1490 struct nlmsghdr n;
1491 struct ndmsg ndm;
1492 char buf[256];
1493 } req;
5c610faf 1494
5895d33f 1495 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
8f7d9fc0 1496
5605ecfc 1497 memset(&req, 0, sizeof(req));
5c610faf 1498
d62a17ae 1499 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1500 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1501 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1502 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
a55ba23f 1503
d62a17ae 1504 req.ndm.ndm_family = AF_INET;
1505 req.ndm.ndm_state = NUD_PERMANENT;
1506 req.ndm.ndm_ifindex = ifindex;
1507 req.ndm.ndm_type = RTN_UNICAST;
5c610faf 1508
f3dbec60
DS
1509 addattr_l(&req.n, sizeof(req),
1510 NDA_PROTOCOL, &protocol, sizeof(protocol));
d62a17ae 1511 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1512 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
5c610faf 1513
d62a17ae 1514 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1515 0);
5c610faf
DS
1516}
1517
762288f5
SW
1518static bool nexthop_set_src(const struct nexthop *nexthop, int family,
1519 union g_addr *src)
1520{
1521 if (family == AF_INET) {
1522 if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1523 src->ipv4 = nexthop->rmap_src.ipv4;
1524 return true;
1525 } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1526 src->ipv4 = nexthop->src.ipv4;
1527 return true;
1528 }
1529 } else if (family == AF_INET6) {
1530 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1531 src->ipv6 = nexthop->rmap_src.ipv6;
1532 return true;
1533 } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1534 src->ipv6 = nexthop->src.ipv6;
1535 return true;
1536 }
1537 }
1538
1539 return false;
1540}
1541
7cdb1a84
MS
1542/*
1543 * Routing table change via netlink interface, using a dataplane context object
1544 */
25779064 1545static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx)
7cdb1a84
MS
1546{
1547 int bytelen;
7cdb1a84
MS
1548 struct nexthop *nexthop = NULL;
1549 unsigned int nexthop_num;
1550 int family;
1551 const char *routedesc;
762288f5 1552 bool setsrc = false;
7cdb1a84
MS
1553 union g_addr src;
1554 const struct prefix *p, *src_p;
1555 uint32_t table_id;
1556
1557 struct {
1558 struct nlmsghdr n;
1559 struct rtmsg r;
1560 char buf[NL_PKT_BUF_SIZE];
1561 } req;
1562
1563 p = dplane_ctx_get_dest(ctx);
1564 src_p = dplane_ctx_get_src(ctx);
1565
1566 family = PREFIX_FAMILY(p);
1567
5709131c 1568 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
7cdb1a84
MS
1569
1570 bytelen = (family == AF_INET ? 4 : 16);
1571
1572 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1573 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1574
334734a8
DS
1575 if ((cmd == RTM_NEWROUTE) &&
1576 ((p->family == AF_INET) || v6_rr_semantics))
1577 req.n.nlmsg_flags |= NLM_F_REPLACE;
7cdb1a84
MS
1578
1579 req.n.nlmsg_type = cmd;
1580
1581 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
1582
1583 req.r.rtm_family = family;
1584 req.r.rtm_dst_len = p->prefixlen;
1585 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1586 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1587
5709131c 1588 if (cmd == RTM_DELROUTE)
7cdb1a84 1589 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
5709131c 1590 else
7cdb1a84 1591 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
7cdb1a84
MS
1592
1593 /*
1594 * blackhole routes are not RTN_UNICAST, they are
1595 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1596 * so setting this value as a RTN_UNICAST would
1597 * cause the route lookup of just the prefix
1598 * to fail. So no need to specify this for
1599 * the RTM_DELROUTE case
1600 */
1601 if (cmd != RTM_DELROUTE)
1602 req.r.rtm_type = RTN_UNICAST;
1603
5709131c 1604 addattr_l(&req.n, sizeof(req), RTA_DST, &p->u.prefix, bytelen);
7cdb1a84 1605 if (src_p)
5709131c 1606 addattr_l(&req.n, sizeof(req), RTA_SRC, &src_p->u.prefix,
7cdb1a84
MS
1607 bytelen);
1608
1609 /* Metric. */
1610 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1611 * used
1612 * either by the kernel or by zebra. Its purely for calculating best
1613 * path(s)
1614 * by the routing protocol and for communicating with protocol peers.
1615 */
5709131c 1616 addattr32(&req.n, sizeof(req), RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
7cdb1a84
MS
1617
1618#if defined(SUPPORT_REALMS)
1619 {
1620 route_tag_t tag;
1621
5709131c 1622 if (cmd == RTM_DELROUTE)
7cdb1a84 1623 tag = dplane_ctx_get_old_tag(ctx);
5709131c 1624 else
7cdb1a84 1625 tag = dplane_ctx_get_tag(ctx);
7cdb1a84
MS
1626
1627 if (tag > 0 && tag <= 255)
5709131c 1628 addattr32(&req.n, sizeof(req), RTA_FLOW, tag);
7cdb1a84
MS
1629 }
1630#endif
1631 /* Table corresponding to this route. */
1632 table_id = dplane_ctx_get_table(ctx);
1633 if (table_id < 256)
1634 req.r.rtm_table = table_id;
1635 else {
1636 req.r.rtm_table = RT_TABLE_UNSPEC;
5709131c 1637 addattr32(&req.n, sizeof(req), RTA_TABLE, table_id);
7cdb1a84
MS
1638 }
1639
1640 _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id);
1641
1642 /*
1643 * If we are not updating the route and we have received
1644 * a route delete, then all we need to fill in is the
1645 * prefix information to tell the kernel to schwack
1646 * it.
1647 */
1648 if (cmd == RTM_DELROUTE)
1649 goto skip;
1650
1651 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
1652 char buf[NL_PKT_BUF_SIZE];
1653 struct rtattr *rta = (void *)buf;
1654 uint32_t mtu = dplane_ctx_get_mtu(ctx);
1655 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
5709131c 1656
7cdb1a84
MS
1657 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
1658 mtu = nexthop_mtu;
1659 rta->rta_type = RTA_METRICS;
1660 rta->rta_len = RTA_LENGTH(0);
5709131c
MS
1661 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1662 RTAX_MTU, &mtu, sizeof(mtu));
7cdb1a84
MS
1663 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1664 RTA_PAYLOAD(rta));
1665 }
1666
7c99d51b 1667 if (kernel_nexthops_supported()) {
9a0132a5
DS
1668 if (IS_ZEBRA_DEBUG_KERNEL)
1669 zlog_debug(
1670 "netlink_route_multipath(): %pFX nhg_id is %u",
1671 p, dplane_ctx_get_nhe_id(ctx));
de3f5488
SW
1672 /* Kernel supports nexthop objects */
1673 addattr32(&req.n, sizeof(req), RTA_NH_ID,
1674 dplane_ctx_get_nhe_id(ctx));
1675 goto skip;
1676 }
1677
7cdb1a84 1678 /* Count overall nexthops so we can decide whether to use singlepath
5709131c
MS
1679 * or multipath case.
1680 */
7cdb1a84
MS
1681 nexthop_num = 0;
1682 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1683 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1684 continue;
1685 if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1686 continue;
1687
1688 nexthop_num++;
1689 }
1690
1691 /* Singlepath case. */
220f0f42 1692 if (nexthop_num == 1) {
7cdb1a84
MS
1693 nexthop_num = 0;
1694 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1695 /*
1696 * So we want to cover 2 types of blackhole
1697 * routes here:
1698 * 1) A normal blackhole route( ala from a static
1699 * install.
1700 * 2) A recursively resolved blackhole route
1701 */
1702 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1703 switch (nexthop->bh_type) {
1704 case BLACKHOLE_ADMINPROHIB:
1705 req.r.rtm_type = RTN_PROHIBIT;
1706 break;
1707 case BLACKHOLE_REJECT:
1708 req.r.rtm_type = RTN_UNREACHABLE;
1709 break;
1710 default:
1711 req.r.rtm_type = RTN_BLACKHOLE;
1712 break;
1713 }
1714 goto skip;
1715 }
1716 if (CHECK_FLAG(nexthop->flags,
1717 NEXTHOP_FLAG_RECURSIVE)) {
5709131c
MS
1718
1719 if (setsrc)
1720 continue;
1721
762288f5
SW
1722 setsrc = nexthop_set_src(nexthop, family, &src);
1723
f183e380 1724 continue;
7cdb1a84
MS
1725 }
1726
1727 if ((cmd == RTM_NEWROUTE
1728 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1729 routedesc = nexthop->rparent
1730 ? "recursive, single-path"
1731 : "single-path";
1732
1733 _netlink_route_build_singlepath(
9a0132a5 1734 p, routedesc, bytelen, nexthop, &req.n,
5709131c 1735 &req.r, sizeof(req), cmd);
7cdb1a84
MS
1736 nexthop_num++;
1737 break;
1738 }
1739 }
1740 if (setsrc && (cmd == RTM_NEWROUTE)) {
1741 if (family == AF_INET)
5709131c 1742 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1743 &src.ipv4, bytelen);
1744 else if (family == AF_INET6)
5709131c 1745 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1746 &src.ipv6, bytelen);
1747 }
1748 } else { /* Multipath case */
1749 char buf[NL_PKT_BUF_SIZE];
1750 struct rtattr *rta = (void *)buf;
1751 struct rtnexthop *rtnh;
81793ac1 1752 const union g_addr *src1 = NULL;
7cdb1a84
MS
1753
1754 rta->rta_type = RTA_MULTIPATH;
1755 rta->rta_len = RTA_LENGTH(0);
1756 rtnh = RTA_DATA(rta);
1757
1758 nexthop_num = 0;
1759 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
7cdb1a84
MS
1760 if (CHECK_FLAG(nexthop->flags,
1761 NEXTHOP_FLAG_RECURSIVE)) {
1762 /* This only works for IPv4 now */
5709131c
MS
1763 if (setsrc)
1764 continue;
1765
762288f5 1766 setsrc = nexthop_set_src(nexthop, family, &src);
78e54ded
MS
1767
1768 continue;
7cdb1a84
MS
1769 }
1770
1771 if ((cmd == RTM_NEWROUTE
1772 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1773 routedesc = nexthop->rparent
1774 ? "recursive, multipath"
1775 : "multipath";
1776 nexthop_num++;
1777
1778 _netlink_route_build_multipath(
9a0132a5
DS
1779 p, routedesc, bytelen, nexthop, rta,
1780 rtnh, &req.r, &src1);
7cdb1a84
MS
1781 rtnh = RTNH_NEXT(rtnh);
1782
1783 if (!setsrc && src1) {
1784 if (family == AF_INET)
1785 src.ipv4 = src1->ipv4;
1786 else if (family == AF_INET6)
1787 src.ipv6 = src1->ipv6;
1788
1789 setsrc = 1;
1790 }
1791 }
1792 }
1793 if (setsrc && (cmd == RTM_NEWROUTE)) {
1794 if (family == AF_INET)
5709131c 1795 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1796 &src.ipv4, bytelen);
1797 else if (family == AF_INET6)
5709131c 1798 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1799 &src.ipv6, bytelen);
1800 if (IS_ZEBRA_DEBUG_KERNEL)
1801 zlog_debug("Setting source");
1802 }
1803
1804 if (rta->rta_len > RTA_LENGTH(0))
1805 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1806 RTA_DATA(rta), RTA_PAYLOAD(rta));
1807 }
1808
1809 /* If there is no useful nexthop then return. */
1810 if (nexthop_num == 0) {
1811 if (IS_ZEBRA_DEBUG_KERNEL)
1812 zlog_debug(
1813 "netlink_route_multipath(): No useful nexthop.");
1814 return 0;
1815 }
1816
1817skip:
7cdb1a84
MS
1818 /* Talk to netlink socket. */
1819 return netlink_talk_info(netlink_talk_filter, &req.n,
1820 dplane_ctx_get_ns(ctx), 0);
1821}
1822
43b5cc5e 1823int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
e3be0432 1824{
5523c156 1825 uint32_t actual_table;
d62a17ae 1826 int suc = 0;
1827 struct mcast_route_data *mr = (struct mcast_route_data *)in;
bd8b9272
DS
1828 struct {
1829 struct nlmsghdr n;
1830 struct ndmsg ndm;
1831 char buf[256];
1832 } req;
e3be0432 1833
d62a17ae 1834 mroute = mr;
5895d33f 1835 struct zebra_ns *zns;
bd8b9272 1836
009f8ad5 1837 zns = zvrf->zns;
5605ecfc 1838 memset(&req, 0, sizeof(req));
bd8b9272
DS
1839
1840 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1841 req.n.nlmsg_flags = NLM_F_REQUEST;
1842 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1843
1844 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1845 req.n.nlmsg_type = RTM_GETROUTE;
1846
1847 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1848 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1849 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1850 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
5523c156
DS
1851 /*
1852 * What?
1853 *
1854 * So during the namespace cleanup we started storing
1855 * the zvrf table_id for the default table as RT_TABLE_MAIN
1856 * which is what the normal routing table for ip routing is.
1857 * This change caused this to break our lookups of sg data
1858 * because prior to this change the zvrf->table_id was 0
1859 * and when the pim multicast kernel code saw a 0,
1860 * it was auto-translated to RT_TABLE_DEFAULT. But since
1861 * we are now passing in RT_TABLE_MAIN there is no auto-translation
1862 * and the kernel goes screw you and the delicious cookies you
1863 * are trying to give me. So now we have this little hack.
1864 */
1865 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
1866 zvrf->table_id;
1867 addattr_l(&req.n, sizeof(req), RTA_TABLE, &actual_table, 4);
e3be0432 1868
bd8b9272
DS
1869 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1870 &zns->netlink_cmd, zns, 0);
e3be0432 1871
bd8b9272 1872 mroute = NULL;
d62a17ae 1873 return suc;
e3be0432
DS
1874}
1875
8d03bc50
SW
1876/* Char length to debug ID with */
1877#define ID_LENGTH 10
1878
565ce0d3 1879static void _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
8d03bc50 1880 uint32_t id,
e22e8001 1881 const struct nh_grp *z_grp,
0c8215cb 1882 const uint8_t count)
565ce0d3 1883{
565ce0d3 1884 struct nexthop_grp grp[count];
8d03bc50
SW
1885 /* Need space for max group size, "/", and null term */
1886 char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
1887 char buf1[ID_LENGTH + 2];
1888
1889 buf[0] = '\0';
565ce0d3
SW
1890
1891 memset(grp, 0, sizeof(grp));
1892
1893 if (count) {
0c8215cb 1894 for (int i = 0; i < count; i++) {
e22e8001 1895 grp[i].id = z_grp[i].id;
df7fb580 1896 grp[i].weight = z_grp[i].weight - 1;
8d03bc50
SW
1897
1898 if (IS_ZEBRA_DEBUG_KERNEL) {
1899 if (i == 0)
1900 snprintf(buf, sizeof(buf1), "group %u",
1901 grp[i].id);
1902 else {
1903 snprintf(buf1, sizeof(buf1), "/%u",
1904 grp[i].id);
1905 strlcat(buf, buf1, sizeof(buf));
1906 }
1907 }
565ce0d3 1908 }
0c8215cb 1909 addattr_l(n, req_size, NHA_GROUP, grp, count * sizeof(*grp));
565ce0d3 1910 }
8d03bc50
SW
1911
1912 if (IS_ZEBRA_DEBUG_KERNEL)
1913 zlog_debug("%s: ID (%u): %s", __func__, id, buf);
565ce0d3
SW
1914}
1915
f820d025
SW
1916/**
1917 * netlink_nexthop() - Nexthop change via the netlink interface
1918 *
1919 * @ctx: Dataplane ctx
1920 *
1921 * Return: Result status
1922 */
1923static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx)
1924{
f820d025
SW
1925 struct {
1926 struct nlmsghdr n;
1927 struct nhmsg nhm;
1928 char buf[NL_PKT_BUF_SIZE];
1929 } req;
1930
8d03bc50
SW
1931 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1932 char label_buf[256];
1933 int num_labels = 0;
1934 size_t req_size = sizeof(req);
1935
81505946 1936 /* Nothing to do if the kernel doesn't support nexthop objects */
7c99d51b 1937 if (!kernel_nexthops_supported())
81505946
SW
1938 return 0;
1939
8d03bc50
SW
1940 label_buf[0] = '\0';
1941
1942 memset(&req, 0, req_size);
f820d025 1943
f820d025
SW
1944 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
1945 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
9a1588c4
SW
1946
1947 if (cmd == RTM_NEWNEXTHOP)
1948 req.n.nlmsg_flags |= NLM_F_REPLACE;
1949
f820d025 1950 req.n.nlmsg_type = cmd;
4f096395 1951 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
f820d025
SW
1952
1953 req.nhm.nh_family = AF_UNSPEC;
fec211ad 1954 /* TODO: Scope? */
f820d025 1955
0c8215cb
SW
1956 uint32_t id = dplane_ctx_get_nhe_id(ctx);
1957
1958 if (!id) {
f820d025
SW
1959 flog_err(
1960 EC_ZEBRA_NHG_FIB_UPDATE,
1961 "Failed trying to update a nexthop group in the kernel that does not have an ID");
1962 return -1;
1963 }
1964
8d03bc50 1965 addattr32(&req.n, req_size, NHA_ID, id);
f820d025
SW
1966
1967 if (cmd == RTM_NEWNEXTHOP) {
e22e8001 1968 if (dplane_ctx_get_nhe_nh_grp_count(ctx))
0c8215cb 1969 _netlink_nexthop_build_group(
8d03bc50 1970 &req.n, req_size, id,
e22e8001
SW
1971 dplane_ctx_get_nhe_nh_grp(ctx),
1972 dplane_ctx_get_nhe_nh_grp_count(ctx));
0c8215cb
SW
1973 else {
1974 const struct nexthop *nh =
1975 dplane_ctx_get_nhe_ng(ctx)->nexthop;
1976 afi_t afi = dplane_ctx_get_nhe_afi(ctx);
e8b0e420 1977
0c8215cb 1978 if (afi == AFI_IP)
565ce0d3 1979 req.nhm.nh_family = AF_INET;
0c8215cb 1980 else if (afi == AFI_IP6)
565ce0d3 1981 req.nhm.nh_family = AF_INET6;
f820d025 1982
565ce0d3 1983 switch (nh->type) {
a6e6a6d8 1984 case NEXTHOP_TYPE_IPV4:
565ce0d3 1985 case NEXTHOP_TYPE_IPV4_IFINDEX:
8d03bc50 1986 addattr_l(&req.n, req_size, NHA_GATEWAY,
565ce0d3
SW
1987 &nh->gate.ipv4, IPV4_MAX_BYTELEN);
1988 break;
a6e6a6d8 1989 case NEXTHOP_TYPE_IPV6:
565ce0d3 1990 case NEXTHOP_TYPE_IPV6_IFINDEX:
8d03bc50 1991 addattr_l(&req.n, req_size, NHA_GATEWAY,
565ce0d3
SW
1992 &nh->gate.ipv6, IPV6_MAX_BYTELEN);
1993 break;
1994 case NEXTHOP_TYPE_BLACKHOLE:
8d03bc50
SW
1995 addattr_l(&req.n, req_size, NHA_BLACKHOLE, NULL,
1996 0);
1997 /* Blackhole shouldn't have anymore attributes
1998 */
1999 goto nexthop_done;
565ce0d3
SW
2000 case NEXTHOP_TYPE_IFINDEX:
2001 /* Don't need anymore info for this */
2002 break;
a6e6a6d8
SW
2003 }
2004
2005 if (!nh->ifindex) {
565ce0d3
SW
2006 flog_err(
2007 EC_ZEBRA_NHG_FIB_UPDATE,
2008 "Context received for kernel nexthop update without an interface");
2009 return -1;
565ce0d3
SW
2010 }
2011
8d03bc50
SW
2012 addattr32(&req.n, req_size, NHA_OIF, nh->ifindex);
2013
62d2ecb2
SW
2014 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
2015 req.nhm.nh_flags |= RTNH_F_ONLINK;
2016
8d03bc50
SW
2017 num_labels =
2018 build_label_stack(nh->nh_label, out_lse,
2019 label_buf, sizeof(label_buf));
2020
2021 if (num_labels) {
2022 /* Set the BoS bit */
2023 out_lse[num_labels - 1] |=
2024 htonl(1 << MPLS_LS_S_SHIFT);
2025
2026 /*
2027 * TODO: MPLS unsupported for now in kernel.
2028 */
2029 if (req.nhm.nh_family == AF_MPLS)
2030 goto nexthop_done;
2031#if 0
2032 addattr_l(&req.n, req_size, NHA_NEWDST,
2033 &out_lse,
2034 num_labels
2035 * sizeof(mpls_lse_t));
2036#endif
2037 else {
2038 struct rtattr *nest;
2039 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
2040
2041 addattr_l(&req.n, req_size,
2042 NHA_ENCAP_TYPE, &encap,
2043 sizeof(uint16_t));
2044 nest = addattr_nest(&req.n, req_size,
2045 NHA_ENCAP);
2046 addattr_l(&req.n, req_size,
2047 MPLS_IPTUNNEL_DST, &out_lse,
2048 num_labels
2049 * sizeof(mpls_lse_t));
2050 addattr_nest_end(&req.n, nest);
2051 }
2052 }
2053
2054 nexthop_done:
2055 if (IS_ZEBRA_DEBUG_KERNEL) {
2056 char buf[NEXTHOP_STRLEN];
2057
2058 snprintfrr(buf, sizeof(buf), "%pNHv", nh);
2059 zlog_debug("%s: ID (%u): %s (%u) %s ", __func__,
2060 id, buf, nh->vrf_id, label_buf);
2061 }
f820d025
SW
2062 }
2063
38e40db1 2064 req.nhm.nh_protocol = zebra2proto(dplane_ctx_get_nhe_type(ctx));
f820d025 2065
f820d025
SW
2066 } else if (cmd != RTM_DELNEXTHOP) {
2067 flog_err(
2068 EC_ZEBRA_NHG_FIB_UPDATE,
2069 "Nexthop group kernel update command (%d) does not exist",
2070 cmd);
2071 return -1;
2072 }
2073
0c8215cb 2074 _netlink_nexthop_debug(cmd, id);
f820d025
SW
2075
2076 return netlink_talk_info(netlink_talk_filter, &req.n,
2077 dplane_ctx_get_ns(ctx), 0);
2078}
2079
2080/**
2081 * kernel_nexthop_update() - Update/delete a nexthop from the kernel
2082 *
2083 * @ctx: Dataplane context
2084 *
2085 * Return: Dataplane result flag
2086 */
2087enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx)
2088{
98cda54a
SW
2089 int cmd = 0;
2090 int ret = 0;
f820d025
SW
2091
2092 switch (dplane_ctx_get_op(ctx)) {
2093 case DPLANE_OP_NH_DELETE:
2094 cmd = RTM_DELNEXTHOP;
2095 break;
2096 case DPLANE_OP_NH_INSTALL:
2097 case DPLANE_OP_NH_UPDATE:
2098 cmd = RTM_NEWNEXTHOP;
2099 break;
2100 case DPLANE_OP_ROUTE_INSTALL:
2101 case DPLANE_OP_ROUTE_UPDATE:
2102 case DPLANE_OP_ROUTE_DELETE:
2103 case DPLANE_OP_ROUTE_NOTIFY:
2104 case DPLANE_OP_LSP_INSTALL:
2105 case DPLANE_OP_LSP_UPDATE:
2106 case DPLANE_OP_LSP_DELETE:
2107 case DPLANE_OP_LSP_NOTIFY:
2108 case DPLANE_OP_PW_INSTALL:
2109 case DPLANE_OP_PW_UNINSTALL:
2110 case DPLANE_OP_SYS_ROUTE_ADD:
2111 case DPLANE_OP_SYS_ROUTE_DELETE:
2112 case DPLANE_OP_ADDR_INSTALL:
2113 case DPLANE_OP_ADDR_UNINSTALL:
2114 case DPLANE_OP_MAC_INSTALL:
2115 case DPLANE_OP_MAC_DELETE:
40a2a6cd
SW
2116 case DPLANE_OP_NEIGH_INSTALL:
2117 case DPLANE_OP_NEIGH_UPDATE:
2118 case DPLANE_OP_NEIGH_DELETE:
2119 case DPLANE_OP_VTEP_ADD:
2120 case DPLANE_OP_VTEP_DELETE:
f820d025
SW
2121 case DPLANE_OP_NONE:
2122 flog_err(
2123 EC_ZEBRA_NHG_FIB_UPDATE,
2124 "Context received for kernel nexthop update with incorrect OP code (%u)",
2125 dplane_ctx_get_op(ctx));
2126 return ZEBRA_DPLANE_REQUEST_FAILURE;
f820d025
SW
2127 }
2128
2129 ret = netlink_nexthop(cmd, ctx);
2130
2131 return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS
2132 : ZEBRA_DPLANE_REQUEST_FAILURE);
2133}
2134
7cdb1a84
MS
2135/*
2136 * Update or delete a prefix from the kernel,
2137 * using info from a dataplane context.
2138 */
25779064 2139enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
7cdb1a84
MS
2140{
2141 int cmd, ret;
2142 const struct prefix *p = dplane_ctx_get_dest(ctx);
f183e380 2143 struct nexthop *nexthop;
7cdb1a84
MS
2144
2145 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
2146 cmd = RTM_DELROUTE;
2147 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
2148 cmd = RTM_NEWROUTE;
2149 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
2150
2151 if (p->family == AF_INET || v6_rr_semantics) {
2152 /* Single 'replace' operation */
2153 cmd = RTM_NEWROUTE;
fe5f21af
DS
2154
2155 /*
2156 * With route replace semantics in place
2157 * for v4 routes and the new route is a system
2158 * route we do not install anything.
2159 * The problem here is that the new system
2160 * route should cause us to withdraw from
2161 * the kernel the old non-system route
2162 */
2163 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) &&
2164 !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2165 (void)netlink_route_multipath(RTM_DELROUTE,
2166 ctx);
7cdb1a84
MS
2167 } else {
2168 /*
2169 * So v6 route replace semantics are not in
2170 * the kernel at this point as I understand it.
2171 * so let's do a delete then an add.
2172 * In the future once v6 route replace semantics
2173 * are in we can figure out what to do here to
2174 * allow working with old and new kernels.
2175 *
2176 * I'm also intentionally ignoring the failure case
2177 * of the route delete. If that happens yeah we're
2178 * screwed.
2179 */
3cdba47a
DS
2180 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
2181 (void)netlink_route_multipath(RTM_DELROUTE,
2182 ctx);
7cdb1a84
MS
2183 cmd = RTM_NEWROUTE;
2184 }
2185
2186 } else {
2187 return ZEBRA_DPLANE_REQUEST_FAILURE;
2188 }
2189
3cdba47a
DS
2190 if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
2191 ret = netlink_route_multipath(cmd, ctx);
2192 else
2193 ret = 0;
f183e380
MS
2194 if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
2195 /* Update installed nexthops to signal which have been
2196 * installed.
2197 */
2198 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2199 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2200 continue;
2201
2202 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
2203 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
f183e380
MS
2204 }
2205 }
2206 }
7cdb1a84
MS
2207
2208 return (ret == 0 ?
2209 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
2210}
2211
d9f5b2f5
SW
2212/**
2213 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
2214 *
2215 * @tb: Netlink RTA data
2216 * @family: Address family in the nhmsg
8c0a24c1 2217 * @ifp: Interface connected - this should be NULL, we fill it in
d9f5b2f5
SW
2218 * @ns_id: Namspace id
2219 *
2220 * Return: New nexthop
2221 */
e22e8001
SW
2222static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
2223 unsigned char family,
2224 struct interface **ifp,
2225 ns_id_t ns_id)
d9f5b2f5 2226{
e22e8001 2227 struct nexthop nh = {};
d9f5b2f5 2228 void *gate = NULL;
8e401b25 2229 enum nexthop_types_t type = 0;
e22e8001
SW
2230 int if_index = 0;
2231 size_t sz = 0;
7134ba70 2232 struct interface *ifp_lookup;
d9f5b2f5
SW
2233
2234 if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
2235
8e401b25 2236
d9f5b2f5
SW
2237 if (tb[NHA_GATEWAY]) {
2238 switch (family) {
2239 case AF_INET:
8e401b25 2240 type = NEXTHOP_TYPE_IPV4_IFINDEX;
d9f5b2f5
SW
2241 sz = 4;
2242 break;
2243 case AF_INET6:
8e401b25 2244 type = NEXTHOP_TYPE_IPV6_IFINDEX;
d9f5b2f5
SW
2245 sz = 16;
2246 break;
2247 default:
2248 flog_warn(
2249 EC_ZEBRA_BAD_NHG_MESSAGE,
c4239c05 2250 "Nexthop gateway with bad address family (%d) received from kernel",
d9f5b2f5 2251 family);
e22e8001 2252 return nh;
d9f5b2f5
SW
2253 }
2254 gate = RTA_DATA(tb[NHA_GATEWAY]);
e22e8001 2255 } else
8e401b25 2256 type = NEXTHOP_TYPE_IFINDEX;
d9f5b2f5 2257
8e401b25 2258 if (type)
e22e8001 2259 nh.type = type;
8e401b25
SW
2260
2261 if (gate)
e22e8001 2262 memcpy(&(nh.gate), gate, sz);
8e401b25
SW
2263
2264 if (if_index)
e22e8001 2265 nh.ifindex = if_index;
8e401b25 2266
7134ba70
DS
2267 ifp_lookup =
2268 if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
2269
e22e8001 2270 if (ifp)
7134ba70
DS
2271 *ifp = ifp_lookup;
2272 if (ifp_lookup)
2273 nh.vrf_id = ifp_lookup->vrf_id;
e22e8001 2274 else {
d9f5b2f5
SW
2275 flog_warn(
2276 EC_ZEBRA_UNKNOWN_INTERFACE,
2277 "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
15569c58 2278 __func__, nh.ifindex);
d9f5b2f5 2279
e22e8001 2280 nh.vrf_id = VRF_DEFAULT;
d9f5b2f5
SW
2281 }
2282
2283 if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
2284 uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
2285 int num_labels = 0;
6e728764 2286
d9f5b2f5
SW
2287 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
2288
e22e8001 2289 if (encap_type == LWTUNNEL_ENCAP_MPLS)
d9f5b2f5 2290 num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
d9f5b2f5 2291
e22e8001
SW
2292 if (num_labels)
2293 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
d9f5b2f5 2294 labels);
d9f5b2f5
SW
2295 }
2296
2297 return nh;
2298}
2299
85f5e761 2300static int netlink_nexthop_process_group(struct rtattr **tb,
5a935f79 2301 struct nh_grp *z_grp, int z_grp_size)
d9f5b2f5 2302{
e22e8001
SW
2303 uint8_t count = 0;
2304 /* linux/nexthop.h group struct */
d9f5b2f5
SW
2305 struct nexthop_grp *n_grp = NULL;
2306
85f5e761 2307 n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
d9f5b2f5
SW
2308 count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
2309
2310 if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
2311 flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
2312 "Invalid nexthop group received from the kernel");
85f5e761 2313 return count;
d9f5b2f5
SW
2314 }
2315
38e40db1 2316#if 0
d9f5b2f5 2317 // TODO: Need type for something?
85f5e761
SW
2318 zlog_debug("Nexthop group type: %d",
2319 *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE])));
d9f5b2f5 2320
38e40db1 2321#endif
d9f5b2f5 2322
5a935f79 2323 for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
e22e8001 2324 z_grp[i].id = n_grp[i].id;
df7fb580 2325 z_grp[i].weight = n_grp[i].weight + 1;
85f5e761 2326 }
d9f5b2f5
SW
2327 return count;
2328}
2329
2330/**
2331 * netlink_nexthop_change() - Read in change about nexthops from the kernel
2332 *
2333 * @h: Netlink message header
2334 * @ns_id: Namspace id
2335 * @startup: Are we reading under startup conditions?
2336 *
2337 * Return: Result status
2338 */
2339int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2340{
2341 int len;
2342 /* nexthop group id */
2343 uint32_t id;
2344 unsigned char family;
38e40db1 2345 int type;
e8b0e420 2346 afi_t afi = AFI_UNSPEC;
946de1b9 2347 vrf_id_t vrf_id = VRF_DEFAULT;
8c0a24c1 2348 struct interface *ifp = NULL;
d9f5b2f5 2349 struct nhmsg *nhm = NULL;
e22e8001
SW
2350 struct nexthop nh = {};
2351 struct nh_grp grp[MULTIPATH_NUM] = {};
85f5e761 2352 /* Count of nexthops in group array */
e22e8001 2353 uint8_t grp_count = 0;
e22e8001 2354 struct rtattr *tb[NHA_MAX + 1] = {};
d9f5b2f5 2355
d9f5b2f5
SW
2356 nhm = NLMSG_DATA(h);
2357
88cafda7
DS
2358 if (ns_id)
2359 vrf_id = ns_id;
2360
d9f5b2f5
SW
2361 if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
2362 return 0;
2363
2364 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
2365 if (len < 0) {
2366 zlog_warn(
2367 "%s: Message received from netlink is of a broken size %d %zu",
15569c58 2368 __func__, h->nlmsg_len,
d9f5b2f5
SW
2369 (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
2370 return -1;
2371 }
2372
d9f5b2f5
SW
2373 netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
2374
2375
2376 if (!tb[NHA_ID]) {
2377 flog_warn(
2378 EC_ZEBRA_BAD_NHG_MESSAGE,
2379 "Nexthop group without an ID received from the kernel");
2380 return -1;
2381 }
2382
2383 /* We use the ID key'd nhg table for kernel updates */
2384 id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
d9f5b2f5 2385
e8b0e420 2386 family = nhm->nh_family;
e8b0e420
SW
2387 afi = family2afi(family);
2388
38e40db1
SW
2389 type = proto2zebra(nhm->nh_protocol, 0, true);
2390
fdee485a
SW
2391 if (IS_ZEBRA_DEBUG_KERNEL)
2392 zlog_debug("%s ID (%u) %s NS %u",
2393 nl_msg_type_to_str(h->nlmsg_type), id,
2394 nl_family_to_str(family), ns_id);
2395
2396
d9f5b2f5
SW
2397 if (h->nlmsg_type == RTM_NEWNEXTHOP) {
2398 if (tb[NHA_GROUP]) {
2399 /**
2400 * If this is a group message its only going to have
2401 * an array of nexthop IDs associated with it
2402 */
5a935f79
SW
2403 grp_count = netlink_nexthop_process_group(
2404 tb, grp, array_size(grp));
85f5e761
SW
2405 } else {
2406 if (tb[NHA_BLACKHOLE]) {
2407 /**
2408 * This nexthop is just for blackhole-ing
2409 * traffic, it should not have an OIF, GATEWAY,
2410 * or ENCAP
2411 */
e22e8001
SW
2412 nh.type = NEXTHOP_TYPE_BLACKHOLE;
2413 nh.bh_type = BLACKHOLE_UNSPEC;
2414 } else if (tb[NHA_OIF])
85f5e761
SW
2415 /**
2416 * This is a true new nexthop, so we need
2417 * to parse the gateway and device info
2418 */
2419 nh = netlink_nexthop_process_nh(tb, family,
2420 &ifp, ns_id);
e22e8001
SW
2421 else {
2422
8e401b25
SW
2423 flog_warn(
2424 EC_ZEBRA_BAD_NHG_MESSAGE,
2425 "Invalid Nexthop message received from the kernel with ID (%u)",
2426 id);
2427 return -1;
2428 }
e22e8001
SW
2429 SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
2430 if (nhm->nh_flags & RTNH_F_ONLINK)
2431 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
2432 vrf_id = nh.vrf_id;
d9f5b2f5
SW
2433 }
2434
38e40db1
SW
2435 if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
2436 type, startup))
e22e8001 2437 return -1;
8e401b25 2438
9a1588c4 2439 } else if (h->nlmsg_type == RTM_DELNEXTHOP)
88cafda7 2440 zebra_nhg_kernel_del(id, vrf_id);
d9f5b2f5 2441
d9f5b2f5
SW
2442 return 0;
2443}
2444
2445/**
2446 * netlink_request_nexthop() - Request nextop information from the kernel
2447 * @zns: Zebra namespace
2448 * @family: AF_* netlink family
2449 * @type: RTM_* route type
2450 *
2451 * Return: Result status
2452 */
2453static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
2454{
2455 struct {
2456 struct nlmsghdr n;
2457 struct nhmsg nhm;
2458 } req;
2459
2460 /* Form the request, specifying filter (rtattr) if needed. */
2461 memset(&req, 0, sizeof(req));
2462 req.n.nlmsg_type = type;
2463 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
2464 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2465 req.nhm.nh_family = family;
2466
2467 return netlink_request(&zns->netlink_cmd, &req.n);
2468}
2469
7d5bb02b 2470
d9f5b2f5
SW
2471/**
2472 * netlink_nexthop_read() - Nexthop read function using netlink interface
2473 *
2474 * @zns: Zebra name space
2475 *
2476 * Return: Result status
2477 * Only called at bootstrap time.
2478 */
2479int netlink_nexthop_read(struct zebra_ns *zns)
2480{
2481 int ret;
2482 struct zebra_dplane_info dp_info;
2483
2484 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2485
2486 /* Get nexthop objects */
2487 ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
2488 if (ret < 0)
2489 return ret;
2490 ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
2491 &dp_info, 0, 1);
81505946
SW
2492
2493 if (!ret)
2494 /* If we succesfully read in nexthop objects,
2495 * this kernel must support them.
2496 */
2497 supports_nh = true;
7c99d51b
MS
2498
2499 if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2500 zlog_debug("Nexthop objects %ssupported on this kernel",
2501 supports_nh ? "" : "not ");
81505946 2502
60e0eaee 2503 return ret;
d9f5b2f5
SW
2504}
2505
2506
d62a17ae 2507int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
5895d33f 2508 int llalen, ns_id_t ns_id)
6b8a5694 2509{
d62a17ae 2510 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
5895d33f 2511 addr, lla, llalen, ns_id);
6b8a5694 2512}
718e3744 2513
13d60d35 2514/*
2515 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
2516 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
2517 */
0bbd4ff4
MS
2518static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx,
2519 int cmd)
13d60d35 2520{
f3dbec60 2521 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 2522 struct {
2523 struct nlmsghdr n;
2524 struct ndmsg ndm;
2525 char buf[256];
2526 } req;
d7c0a89a 2527 uint8_t dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
0bbd4ff4 2528 const struct ipaddr *addr;
d62a17ae 2529
5605ecfc 2530 memset(&req, 0, sizeof(req));
d62a17ae 2531
2532 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2533 req.n.nlmsg_flags = NLM_F_REQUEST;
2534 if (cmd == RTM_NEWNEIGH)
2535 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
2536 req.n.nlmsg_type = cmd;
2537 req.ndm.ndm_family = PF_BRIDGE;
2538 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
fec211ad 2539 req.ndm.ndm_flags |= NTF_SELF; /* Handle by "self", not "master" */
d62a17ae 2540
2541
f3dbec60
DS
2542 addattr_l(&req.n, sizeof(req),
2543 NDA_PROTOCOL, &protocol, sizeof(protocol));
d62a17ae 2544 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
0bbd4ff4 2545 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
13d60d35 2546
0bbd4ff4 2547 addr = dplane_ctx_neigh_get_ipaddr(ctx);
13d60d35 2548
0bbd4ff4 2549 addattr_l(&req.n, sizeof(req), NDA_DST, &(addr->ipaddr_v4), 4);
13d60d35 2550
0bbd4ff4
MS
2551 return netlink_talk_info(netlink_talk_filter, &req.n,
2552 dplane_ctx_get_ns(ctx), 0);
13d60d35 2553}
2554
2232a77c 2555#ifndef NDA_RTA
d62a17ae 2556#define NDA_RTA(r) \
2557 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2232a77c 2558#endif
2559
2414abd3 2560static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 2561{
d62a17ae 2562 struct ndmsg *ndm;
2563 struct interface *ifp;
2564 struct zebra_if *zif;
d62a17ae 2565 struct rtattr *tb[NDA_MAX + 1];
2566 struct interface *br_if;
2567 struct ethaddr mac;
2568 vlanid_t vid = 0;
4b3f26f4 2569 struct in_addr vtep_ip;
d62a17ae 2570 int vid_present = 0, dst_present = 0;
2571 char buf[ETHER_ADDR_STRLEN];
2572 char vid_buf[20];
2573 char dst_buf[30];
a37f4598 2574 bool sticky;
d62a17ae 2575
2576 ndm = NLMSG_DATA(h);
2577
2853fed6 2578 /* We only process macfdb notifications if EVPN is enabled */
2579 if (!is_evpn_enabled())
2580 return 0;
2581
4b3f26f4 2582 /* Parse attributes and extract fields of interest. Do basic
2583 * validation of the fields.
2584 */
2585 memset(tb, 0, sizeof tb);
d62a17ae 2586 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2587
2588 if (!tb[NDA_LLADDR]) {
28bd0652 2589 if (IS_ZEBRA_DEBUG_KERNEL)
4b3f26f4 2590 zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
28bd0652 2591 nl_msg_type_to_str(h->nlmsg_type),
4b3f26f4 2592 ndm->ndm_ifindex);
d62a17ae 2593 return 0;
2594 }
2595
ff8b7eb8 2596 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
2597 if (IS_ZEBRA_DEBUG_KERNEL)
2598 zlog_debug(
4b3f26f4 2599 "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
2600 nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
28bd0652 2601 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 2602 return 0;
2603 }
2604
ff8b7eb8 2605 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 2606
2607 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
2608 vid_present = 1;
d7c0a89a 2609 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
d62a17ae 2610 sprintf(vid_buf, " VLAN %u", vid);
2611 }
2612
2613 if (tb[NDA_DST]) {
2614 /* TODO: Only IPv4 supported now. */
2615 dst_present = 1;
4b3f26f4 2616 memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
d62a17ae 2617 IPV4_MAX_BYTELEN);
4b3f26f4 2618 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
d62a17ae 2619 }
2620
d62a17ae 2621 if (IS_ZEBRA_DEBUG_KERNEL)
4b3f26f4 2622 zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s",
d62a17ae 2623 nl_msg_type_to_str(h->nlmsg_type),
d62a17ae 2624 ndm->ndm_ifindex, vid_present ? vid_buf : "",
4b3f26f4 2625 ndm->ndm_state, ndm->ndm_flags,
d62a17ae 2626 prefix_mac2str(&mac, buf, sizeof(buf)),
2627 dst_present ? dst_buf : "");
2628
4b3f26f4 2629 /* The interface should exist. */
2630 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
2631 ndm->ndm_ifindex);
2632 if (!ifp || !ifp->info)
2633 return 0;
2634
2635 /* The interface should be something we're interested in. */
2636 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
2637 return 0;
2638
2639 zif = (struct zebra_if *)ifp->info;
2640 if ((br_if = zif->brslave_info.br_if) == NULL) {
2641 if (IS_ZEBRA_DEBUG_KERNEL)
2642 zlog_debug(
2643 "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
2644 nl_msg_type_to_str(h->nlmsg_type), ifp->name,
2645 ndm->ndm_ifindex,
2646 zif->brslave_info.bridge_ifindex);
2647 return 0;
2648 }
2649
2650 sticky = !!(ndm->ndm_state & NUD_NOARP);
2651
28bd0652
DS
2652 if (filter_vlan && vid != filter_vlan) {
2653 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e 2654 zlog_debug(" Filtered due to filter vlan: %d",
28bd0652 2655 filter_vlan);
d62a17ae 2656 return 0;
28bd0652 2657 }
d62a17ae 2658
2659 /* If add or update, do accordingly if learnt on a "local" interface; if
2660 * the notification is over VxLAN, this has to be related to
2661 * multi-homing,
2662 * so perform an implicit delete of any local entry (if it exists).
2663 */
2664 if (h->nlmsg_type == RTM_NEWNEIGH) {
4b3f26f4 2665 /* Drop "permanent" entries. */
2666 if (ndm->ndm_state & NUD_PERMANENT) {
2667 if (IS_ZEBRA_DEBUG_KERNEL)
d6951e5e
DL
2668 zlog_debug(
2669 " Dropping entry because of NUD_PERMANENT");
2670 return 0;
4b3f26f4 2671 }
2672
d62a17ae 2673 if (IS_ZEBRA_IF_VXLAN(ifp))
2674 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
2675 vid);
2676
2677 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
2678 sticky);
2679 }
2680
2681 /* This is a delete notification.
4b3f26f4 2682 * Ignore the notification with IP dest as it may just signify that the
2683 * MAC has moved from remote to local. The exception is the special
2684 * all-zeros MAC that represents the BUM flooding entry; we may have
2685 * to readd it. Otherwise,
d62a17ae 2686 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
2687 * 2. For a MAC over "local" interface, delete the mac
2688 * Note: We will get notifications from both bridge driver and VxLAN
2689 * driver.
d62a17ae 2690 */
28bd0652 2691 if (dst_present) {
4b3f26f4 2692 u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2693
2694 if (!memcmp(zero_mac, mac.octet, ETH_ALEN))
2695 return zebra_vxlan_check_readd_vtep(ifp, vtep_ip);
d62a17ae 2696 return 0;
28bd0652 2697 }
d62a17ae 2698
2699 if (IS_ZEBRA_IF_VXLAN(ifp))
2700 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
2701 vid);
2702
2703 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2232a77c 2704}
2705
2414abd3 2706static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 2707{
d62a17ae 2708 int len;
2709 struct ndmsg *ndm;
2232a77c 2710
d62a17ae 2711 if (h->nlmsg_type != RTM_NEWNEIGH)
2712 return 0;
2232a77c 2713
d62a17ae 2714 /* Length validity. */
2715 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2716 if (len < 0)
2717 return -1;
2232a77c 2718
d62a17ae 2719 /* We are interested only in AF_BRIDGE notifications. */
2720 ndm = NLMSG_DATA(h);
2721 if (ndm->ndm_family != AF_BRIDGE)
2722 return 0;
2232a77c 2723
2414abd3 2724 return netlink_macfdb_change(h, len, ns_id);
2232a77c 2725}
2726
2727/* Request for MAC FDB information from the kernel */
85a75f1e
MS
2728static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
2729 int type, ifindex_t master_ifindex)
2232a77c 2730{
d62a17ae 2731 struct {
2732 struct nlmsghdr n;
2733 struct ifinfomsg ifm;
2734 char buf[256];
2735 } req;
2736
2737 /* Form the request, specifying filter (rtattr) if needed. */
2738 memset(&req, 0, sizeof(req));
2739 req.n.nlmsg_type = type;
718f9b0f 2740 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 2741 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
2742 req.ifm.ifi_family = family;
2743 if (master_ifindex)
2744 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
2745
85a75f1e 2746 return netlink_request(netlink_cmd, &req.n);
2232a77c 2747}
2748
2749/*
2750 * MAC forwarding database read using netlink interface. This is invoked
2751 * at startup.
2752 */
d62a17ae 2753int netlink_macfdb_read(struct zebra_ns *zns)
2232a77c 2754{
d62a17ae 2755 int ret;
85a75f1e
MS
2756 struct zebra_dplane_info dp_info;
2757
2758 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 2759
2760 /* Get bridge FDB table. */
85a75f1e
MS
2761 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
2762 0);
d62a17ae 2763 if (ret < 0)
2764 return ret;
2765 /* We are reading entire table. */
2766 filter_vlan = 0;
85a75f1e
MS
2767 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2768 &dp_info, 0, 1);
d62a17ae 2769
2770 return ret;
2232a77c 2771}
2772
2773/*
2774 * MAC forwarding database read using netlink interface. This is for a
2775 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
2776 */
d62a17ae 2777int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
2778 struct interface *br_if)
2232a77c 2779{
d62a17ae 2780 struct zebra_if *br_zif;
2781 struct zebra_if *zif;
2782 struct zebra_l2info_vxlan *vxl;
85a75f1e 2783 struct zebra_dplane_info dp_info;
d62a17ae 2784 int ret = 0;
2785
85a75f1e 2786 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 2787
2788 /* Save VLAN we're filtering on, if needed. */
2789 br_zif = (struct zebra_if *)br_if->info;
2790 zif = (struct zebra_if *)ifp->info;
2791 vxl = &zif->l2info.vxl;
2792 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
2793 filter_vlan = vxl->access_vlan;
2794
2795 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2796 */
85a75f1e 2797 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
d62a17ae 2798 br_if->ifindex);
2799 if (ret < 0)
2800 return ret;
85a75f1e
MS
2801 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2802 &dp_info, 0, 0);
d62a17ae 2803
2804 /* Reset VLAN filter. */
2805 filter_vlan = 0;
2806 return ret;
2232a77c 2807}
2808
67fb9374
CS
2809
2810/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
2811static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
2812 int family,
2813 int type,
2814 struct interface *br_if,
2815 struct ethaddr *mac,
2816 vlanid_t vid)
2817{
2818 struct {
2819 struct nlmsghdr n;
2820 struct ndmsg ndm;
2821 char buf[256];
2822 } req;
2823 struct zebra_if *br_zif;
2824 char buf[ETHER_ADDR_STRLEN];
2825
2826 memset(&req, 0, sizeof(req));
2827 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2828 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
2829 req.n.nlmsg_flags = NLM_F_REQUEST;
2830 req.ndm.ndm_family = family; /* AF_BRIDGE */
2831 /* req.ndm.ndm_state = NUD_REACHABLE; */
2832
2833 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2834
2835 br_zif = (struct zebra_if *)br_if->info;
2836 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
2837 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2838
2839 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2840
2841 if (IS_ZEBRA_DEBUG_KERNEL)
15569c58 2842 zlog_debug("%s: Tx family %s IF %s(%u) MAC %s vid %u", __func__,
67fb9374
CS
2843 nl_family_to_str(req.ndm.ndm_family), br_if->name,
2844 br_if->ifindex,
2845 prefix_mac2str(mac, buf, sizeof(buf)), vid);
2846
2847 return netlink_request(&zns->netlink_cmd, &req.n);
2848}
2849
2850int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
2851 struct interface *br_if,
2852 struct ethaddr *mac, vlanid_t vid)
2853{
2854 int ret = 0;
2855 struct zebra_dplane_info dp_info;
2856
2857 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2858
2859 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2860 */
2861 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
2862 RTM_GETNEIGH,
2863 br_if, mac, vid);
2864 if (ret < 0)
2865 return ret;
2866
2867 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2868 &dp_info, 1, 0);
2869
2870 return ret;
2871}
036d93c0
MS
2872
2873/*
2874 * Netlink-specific handler for MAC updates using dataplane context object.
2875 */
2876static enum zebra_dplane_result
2877netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 2878{
f3dbec60 2879 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 2880 struct {
2881 struct nlmsghdr n;
2882 struct ndmsg ndm;
2883 char buf[256];
2884 } req;
036d93c0 2885 int ret;
d62a17ae 2886 int dst_alen;
36590076 2887 int vid_present = 0;
036d93c0
MS
2888 int cmd;
2889 struct in_addr vtep_ip;
2890 vlanid_t vid;
2891
2892 if (dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL)
2893 cmd = RTM_NEWNEIGH;
2894 else
2895 cmd = RTM_DELNEIGH;
2896
5605ecfc 2897 memset(&req, 0, sizeof(req));
d62a17ae 2898
2899 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2900 req.n.nlmsg_flags = NLM_F_REQUEST;
2901 if (cmd == RTM_NEWNEIGH)
2902 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2903 req.n.nlmsg_type = cmd;
2904 req.ndm.ndm_family = AF_BRIDGE;
2905 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2906 req.ndm.ndm_state = NUD_REACHABLE;
2907
036d93c0 2908 if (dplane_ctx_mac_is_sticky(ctx))
d62a17ae 2909 req.ndm.ndm_state |= NUD_NOARP;
2910 else
2911 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2912
f3dbec60
DS
2913 addattr_l(&req.n, sizeof(req),
2914 NDA_PROTOCOL, &protocol, sizeof(protocol));
036d93c0
MS
2915 addattr_l(&req.n, sizeof(req), NDA_LLADDR,
2916 dplane_ctx_mac_get_addr(ctx), 6);
478566d6 2917 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
036d93c0 2918
d63c1b18 2919 dst_alen = 4; // TODO: hardcoded
036d93c0 2920 vtep_ip = *(dplane_ctx_mac_get_vtep_ip(ctx));
d63c1b18 2921 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
036d93c0 2922
478566d6
MS
2923 vid = dplane_ctx_mac_get_vlan(ctx);
2924
2925 if (vid > 0) {
d62a17ae 2926 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2927 vid_present = 1;
d62a17ae 2928 }
478566d6
MS
2929 addattr32(&req.n, sizeof(req), NDA_MASTER,
2930 dplane_ctx_mac_get_br_ifindex(ctx));
d62a17ae 2931
036d93c0
MS
2932 if (IS_ZEBRA_DEBUG_KERNEL) {
2933 char ipbuf[PREFIX_STRLEN];
2934 char buf[ETHER_ADDR_STRLEN];
2935 char dst_buf[PREFIX_STRLEN + 10];
478566d6
MS
2936 char vid_buf[20];
2937
2938 if (vid_present)
2939 snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
2940 else
2941 vid_buf[0] = '\0';
036d93c0
MS
2942
2943 inet_ntop(AF_INET, &vtep_ip, ipbuf, sizeof(ipbuf));
2944 snprintf(dst_buf, sizeof(dst_buf), " dst %s", ipbuf);
2945 prefix_mac2str(dplane_ctx_mac_get_addr(ctx), buf, sizeof(buf));
2946
d62a17ae 2947 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2948 nl_msg_type_to_str(cmd),
478566d6
MS
2949 nl_family_to_str(req.ndm.ndm_family),
2950 dplane_ctx_get_ifname(ctx),
2951 dplane_ctx_get_ifindex(ctx), vid_buf,
036d93c0
MS
2952 dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
2953 buf, dst_buf);
2954 }
d62a17ae 2955
036d93c0
MS
2956 ret = netlink_talk_info(netlink_talk_filter, &req.n,
2957 dplane_ctx_get_ns(ctx), 0);
2958 if (ret == 0)
2959 return ZEBRA_DPLANE_REQUEST_SUCCESS;
2960 else
2961 return ZEBRA_DPLANE_REQUEST_FAILURE;
2232a77c 2962}
2963
f17b99ed
DS
2964/*
2965 * In the event the kernel deletes ipv4 link-local neighbor entries created for
2966 * 5549 support, re-install them.
2967 */
2968static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
9b036974
DS
2969 struct interface *ifp, struct ipaddr *ip,
2970 bool handle_failed)
f17b99ed
DS
2971{
2972 if (ndm->ndm_family != AF_INET)
2973 return;
2974
2975 if (!zif->v6_2_v4_ll_neigh_entry)
2976 return;
2977
2978 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
2979 return;
2980
9b036974
DS
2981 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
2982 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
2983 ifp->name);
2984 return;
2985 }
2986
f17b99ed
DS
2987 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
2988}
2989
d62a17ae 2990#define NUD_VALID \
2991 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2992 | NUD_DELAY)
2232a77c 2993
2414abd3 2994static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 2995{
d62a17ae 2996 struct ndmsg *ndm;
2997 struct interface *ifp;
2998 struct zebra_if *zif;
d62a17ae 2999 struct rtattr *tb[NDA_MAX + 1];
3000 struct interface *link_if;
3001 struct ethaddr mac;
3002 struct ipaddr ip;
3003 char buf[ETHER_ADDR_STRLEN];
3004 char buf2[INET6_ADDRSTRLEN];
3005 int mac_present = 0;
a37f4598 3006 bool is_ext;
3007 bool is_router;
d62a17ae 3008
3009 ndm = NLMSG_DATA(h);
3010
3011 /* The interface should exist. */
5895d33f 3012 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
d62a17ae 3013 ndm->ndm_ifindex);
2853fed6 3014 if (!ifp || !ifp->info)
d62a17ae 3015 return 0;
3016
20089ae2
DS
3017 zif = (struct zebra_if *)ifp->info;
3018
3019 /* Parse attributes and extract fields of interest. */
0d6f7fd6 3020 memset(tb, 0, sizeof(tb));
20089ae2
DS
3021 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
3022
3023 if (!tb[NDA_DST]) {
9df414fe
QY
3024 zlog_debug("%s family %s IF %s(%u) - no DST",
3025 nl_msg_type_to_str(h->nlmsg_type),
3026 nl_family_to_str(ndm->ndm_family), ifp->name,
3027 ndm->ndm_ifindex);
d62a17ae 3028 return 0;
20089ae2
DS
3029 }
3030
3031 memset(&ip, 0, sizeof(struct ipaddr));
3032 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
3033 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
3034
f17b99ed
DS
3035 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
3036 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
9b036974 3037 netlink_handle_5549(ndm, zif, ifp, &ip, false);
28bd0652
DS
3038 if (IS_ZEBRA_DEBUG_KERNEL)
3039 zlog_debug(
3040 "\tNeighbor Entry Received is a 5549 entry, finished");
20089ae2
DS
3041 return 0;
3042 }
d62a17ae 3043
f17b99ed 3044 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
9b036974
DS
3045 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
3046 netlink_handle_5549(ndm, zif, ifp, &ip, true);
f17b99ed 3047
d62a17ae 3048 /* The neighbor is present on an SVI. From this, we locate the
3049 * underlying
3050 * bridge because we're only interested in neighbors on a VxLAN bridge.
3051 * The bridge is located based on the nature of the SVI:
3052 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
3053 * interface
3054 * and is linked to the bridge
3055 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
3056 * inteface
3057 * itself
3058 */
3059 if (IS_ZEBRA_IF_VLAN(ifp)) {
5895d33f 3060 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
71349e03 3061 zif->link_ifindex);
d62a17ae 3062 if (!link_if)
3063 return 0;
3064 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
3065 link_if = ifp;
28bd0652
DS
3066 else {
3067 if (IS_ZEBRA_DEBUG_KERNEL)
3068 zlog_debug(
3069 "\tNeighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
d62a17ae 3070 return 0;
28bd0652 3071 }
d62a17ae 3072
d62a17ae 3073 memset(&mac, 0, sizeof(struct ethaddr));
d62a17ae 3074 if (h->nlmsg_type == RTM_NEWNEIGH) {
3075 if (tb[NDA_LLADDR]) {
ff8b7eb8 3076 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
3077 if (IS_ZEBRA_DEBUG_KERNEL)
3078 zlog_debug(
3079 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
3080 nl_msg_type_to_str(
3081 h->nlmsg_type),
3082 nl_family_to_str(
3083 ndm->ndm_family),
3084 ifp->name, ndm->ndm_ifindex,
3085 (unsigned long)RTA_PAYLOAD(
3086 tb[NDA_LLADDR]));
d62a17ae 3087 return 0;
3088 }
3089
3090 mac_present = 1;
ff8b7eb8 3091 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 3092 }
3093
a37f4598 3094 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
3095 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
d62a17ae 3096
3097 if (IS_ZEBRA_DEBUG_KERNEL)
3098 zlog_debug(
3099 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
3100 nl_msg_type_to_str(h->nlmsg_type),
3101 nl_family_to_str(ndm->ndm_family), ifp->name,
3102 ndm->ndm_ifindex,
3103 ipaddr2str(&ip, buf2, sizeof(buf2)),
3104 mac_present
3105 ? prefix_mac2str(&mac, buf, sizeof(buf))
3106 : "",
3107 ndm->ndm_state, ndm->ndm_flags);
3108
3109 /* If the neighbor state is valid for use, process as an add or
3110 * update
3111 * else process as a delete. Note that the delete handling may
3112 * result
3113 * in re-adding the neighbor if it is a valid "remote" neighbor.
3114 */
3115 if (ndm->ndm_state & NUD_VALID)
ee69da27 3116 return zebra_vxlan_handle_kernel_neigh_update(
d62a17ae 3117 ifp, link_if, &ip, &mac, ndm->ndm_state,
a37f4598 3118 is_ext, is_router);
d62a17ae 3119
ee69da27 3120 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
d62a17ae 3121 }
3122
3123 if (IS_ZEBRA_DEBUG_KERNEL)
3124 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
3125 nl_msg_type_to_str(h->nlmsg_type),
3126 nl_family_to_str(ndm->ndm_family), ifp->name,
3127 ndm->ndm_ifindex,
3128 ipaddr2str(&ip, buf2, sizeof(buf2)));
3129
3130 /* Process the delete - it may result in re-adding the neighbor if it is
3131 * a valid "remote" neighbor.
3132 */
ee69da27 3133 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2232a77c 3134}
3135
2414abd3 3136static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 3137{
d62a17ae 3138 int len;
3139 struct ndmsg *ndm;
2232a77c 3140
d62a17ae 3141 if (h->nlmsg_type != RTM_NEWNEIGH)
3142 return 0;
2232a77c 3143
d62a17ae 3144 /* Length validity. */
3145 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3146 if (len < 0)
3147 return -1;
2232a77c 3148
d62a17ae 3149 /* We are interested only in AF_INET or AF_INET6 notifications. */
3150 ndm = NLMSG_DATA(h);
3151 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
3152 return 0;
2232a77c 3153
2414abd3 3154 return netlink_neigh_change(h, len);
2232a77c 3155}
3156
3157/* Request for IP neighbor information from the kernel */
85a75f1e
MS
3158static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
3159 int type, ifindex_t ifindex)
2232a77c 3160{
d62a17ae 3161 struct {
3162 struct nlmsghdr n;
3163 struct ndmsg ndm;
3164 char buf[256];
3165 } req;
3166
3167 /* Form the request, specifying filter (rtattr) if needed. */
3168 memset(&req, 0, sizeof(req));
3169 req.n.nlmsg_type = type;
718f9b0f 3170 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 3171 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3172 req.ndm.ndm_family = family;
3173 if (ifindex)
3174 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
3175
85a75f1e 3176 return netlink_request(netlink_cmd, &req.n);
2232a77c 3177}
3178
3179/*
3180 * IP Neighbor table read using netlink interface. This is invoked
3181 * at startup.
3182 */
d62a17ae 3183int netlink_neigh_read(struct zebra_ns *zns)
2232a77c 3184{
d62a17ae 3185 int ret;
85a75f1e
MS
3186 struct zebra_dplane_info dp_info;
3187
3188 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3189
d62a17ae 3190 /* Get IP neighbor table. */
85a75f1e
MS
3191 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
3192 0);
d62a17ae 3193 if (ret < 0)
3194 return ret;
85a75f1e
MS
3195 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3196 &dp_info, 0, 1);
2232a77c 3197
d62a17ae 3198 return ret;
2232a77c 3199}
3200
3201/*
3202 * IP Neighbor table read using netlink interface. This is for a specific
3203 * VLAN device.
3204 */
d62a17ae 3205int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2232a77c 3206{
d62a17ae 3207 int ret = 0;
85a75f1e
MS
3208 struct zebra_dplane_info dp_info;
3209
3210 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 3211
85a75f1e 3212 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
d62a17ae 3213 vlan_if->ifindex);
3214 if (ret < 0)
3215 return ret;
85a75f1e
MS
3216 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3217 &dp_info, 0, 0);
2232a77c 3218
d62a17ae 3219 return ret;
2232a77c 3220}
3221
67fb9374
CS
3222/*
3223 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
3224 * read using netlink interface.
3225 */
3226static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
3227 int type, struct ipaddr *ip,
3228 ifindex_t ifindex)
3229{
3230 struct {
3231 struct nlmsghdr n;
3232 struct ndmsg ndm;
3233 char buf[256];
3234 } req;
3235 int ipa_len;
3236
3237 /* Form the request, specifying filter (rtattr) if needed. */
3238 memset(&req, 0, sizeof(req));
3239 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3240 req.n.nlmsg_flags = NLM_F_REQUEST;
3241 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
3242 req.ndm.ndm_ifindex = ifindex;
3243
3244 if (IS_IPADDR_V4(ip)) {
3245 ipa_len = IPV4_MAX_BYTELEN;
3246 req.ndm.ndm_family = AF_INET;
3247
3248 } else {
3249 ipa_len = IPV6_MAX_BYTELEN;
3250 req.ndm.ndm_family = AF_INET6;
3251 }
3252
3253 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
3254
7c26c121
CS
3255 if (IS_ZEBRA_DEBUG_KERNEL) {
3256 char buf[INET6_ADDRSTRLEN];
3257
3258 zlog_debug("%s: Tx %s family %s IF %u IP %s flags 0x%x",
3259 __func__, nl_msg_type_to_str(type),
3260 nl_family_to_str(req.ndm.ndm_family), ifindex,
3261 ipaddr2str(ip, buf, sizeof(buf)), req.n.nlmsg_flags);
3262 }
3263
67fb9374
CS
3264 return netlink_request(&zns->netlink_cmd, &req.n);
3265}
3266
3267int netlink_neigh_read_specific_ip(struct ipaddr *ip,
3268 struct interface *vlan_if)
3269{
3270 int ret = 0;
3271 struct zebra_ns *zns;
3272 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vlan_if->vrf_id);
3273 char buf[INET6_ADDRSTRLEN];
3274 struct zebra_dplane_info dp_info;
3275
3276 zns = zvrf->zns;
3277
3278 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3279
3280 if (IS_ZEBRA_DEBUG_KERNEL)
3281 zlog_debug("%s: neigh request IF %s(%u) IP %s vrf_id %u",
15569c58
DA
3282 __func__, vlan_if->name, vlan_if->ifindex,
3283 ipaddr2str(ip, buf, sizeof(buf)), vlan_if->vrf_id);
67fb9374
CS
3284
3285 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
3286 vlan_if->ifindex);
3287 if (ret < 0)
3288 return ret;
3289
3290 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
3291 &dp_info, 1, 0);
3292
3293 return ret;
3294}
3295
2414abd3 3296int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
2232a77c 3297{
d62a17ae 3298 int len;
3299 struct ndmsg *ndm;
2232a77c 3300
d62a17ae 3301 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
3302 return 0;
2232a77c 3303
d62a17ae 3304 /* Length validity. */
3305 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
9bdf8618 3306 if (len < 0) {
15569c58
DA
3307 zlog_err(
3308 "%s: Message received from netlink is of a broken size %d %zu",
3309 __func__, h->nlmsg_len,
3310 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
d62a17ae 3311 return -1;
9bdf8618 3312 }
2232a77c 3313
d62a17ae 3314 /* Is this a notification for the MAC FDB or IP neighbor table? */
3315 ndm = NLMSG_DATA(h);
3316 if (ndm->ndm_family == AF_BRIDGE)
2414abd3 3317 return netlink_macfdb_change(h, len, ns_id);
2232a77c 3318
d62a17ae 3319 if (ndm->ndm_type != RTN_UNICAST)
3320 return 0;
2232a77c 3321
d62a17ae 3322 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2414abd3 3323 return netlink_ipneigh_change(h, len, ns_id);
8a1b681c 3324 else {
9df414fe 3325 flog_warn(
e914ccbe 3326 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
3327 "Invalid address family: %u received from kernel neighbor change: %s",
3328 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
3329 return 0;
3330 }
2232a77c 3331
d62a17ae 3332 return 0;
2232a77c 3333}
3334
931fa60c
MS
3335/*
3336 * Utility neighbor-update function, using info from dplane context.
3337 */
3338static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
3339 int cmd)
2232a77c 3340{
f3dbec60 3341 uint8_t protocol = RTPROT_ZEBRA;
d62a17ae 3342 struct {
3343 struct nlmsghdr n;
3344 struct ndmsg ndm;
3345 char buf[256];
3346 } req;
3347 int ipa_len;
d62a17ae 3348 char buf[INET6_ADDRSTRLEN];
3349 char buf2[ETHER_ADDR_STRLEN];
931fa60c
MS
3350 const struct ipaddr *ip;
3351 const struct ethaddr *mac;
3352 uint8_t flags;
3353 uint16_t state;
d62a17ae 3354
5605ecfc 3355 memset(&req, 0, sizeof(req));
d62a17ae 3356
931fa60c
MS
3357 ip = dplane_ctx_neigh_get_ipaddr(ctx);
3358 mac = dplane_ctx_neigh_get_mac(ctx);
3359 if (is_zero_mac(mac))
3360 mac = NULL;
3361
3362 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
3363 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
3364
d62a17ae 3365 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3366 req.n.nlmsg_flags = NLM_F_REQUEST;
3367 if (cmd == RTM_NEWNEIGH)
3368 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
3369 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
3370 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
68e33151 3371 req.ndm.ndm_state = state;
931fa60c 3372 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
d62a17ae 3373 req.ndm.ndm_type = RTN_UNICAST;
68e33151 3374 req.ndm.ndm_flags = flags;
d62a17ae 3375
f3dbec60
DS
3376 addattr_l(&req.n, sizeof(req),
3377 NDA_PROTOCOL, &protocol, sizeof(protocol));
d62a17ae 3378 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
3379 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
3380 if (mac)
3381 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
3382
3383 if (IS_ZEBRA_DEBUG_KERNEL)
6fe2b0e6 3384 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x",
d62a17ae 3385 nl_msg_type_to_str(cmd),
931fa60c
MS
3386 nl_family_to_str(req.ndm.ndm_family),
3387 dplane_ctx_get_ifname(ctx),
3388 dplane_ctx_get_ifindex(ctx),
3389 ipaddr2str(ip, buf, sizeof(buf)),
d62a17ae 3390 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
931fa60c
MS
3391 : "null",
3392 flags, state);
d62a17ae 3393
931fa60c
MS
3394 return netlink_talk_info(netlink_talk_filter, &req.n,
3395 dplane_ctx_get_ns(ctx), 0);
2232a77c 3396}
3397
036d93c0
MS
3398/*
3399 * Update MAC, using dataplane context object.
3400 */
3401enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 3402{
036d93c0 3403 return netlink_macfdb_update_ctx(ctx);
2232a77c 3404}
3405
931fa60c 3406enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 3407{
931fa60c 3408 int ret = -1;
2232a77c 3409
931fa60c
MS
3410 switch (dplane_ctx_get_op(ctx)) {
3411 case DPLANE_OP_NEIGH_INSTALL:
3412 case DPLANE_OP_NEIGH_UPDATE:
3413 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH);
3414 break;
3415 case DPLANE_OP_NEIGH_DELETE:
3416 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH);
3417 break;
0bbd4ff4
MS
3418 case DPLANE_OP_VTEP_ADD:
3419 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH);
3420 break;
3421 case DPLANE_OP_VTEP_DELETE:
3422 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH);
3423 break;
931fa60c
MS
3424 default:
3425 break;
3426 }
2232a77c 3427
931fa60c
MS
3428 return (ret == 0 ?
3429 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
6fe2b0e6
CS
3430}
3431
16c628de
MS
3432/*
3433 * MPLS label forwarding table change via netlink interface, using dataplane
3434 * context information.
3435 */
fc608372 3436int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx)
16c628de
MS
3437{
3438 mpls_lse_t lse;
81793ac1 3439 const zebra_nhlfe_t *nhlfe;
16c628de
MS
3440 struct nexthop *nexthop = NULL;
3441 unsigned int nexthop_num;
3442 const char *routedesc;
3443 int route_type;
9a0132a5 3444 struct prefix p = {0};
16c628de
MS
3445
3446 struct {
3447 struct nlmsghdr n;
3448 struct rtmsg r;
3449 char buf[NL_PKT_BUF_SIZE];
3450 } req;
3451
3452 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
3453
3454 /*
3455 * Count # nexthops so we can decide whether to use singlepath
3456 * or multipath case.
3457 */
3458 nexthop_num = 0;
3459 for (nhlfe = dplane_ctx_get_nhlfe(ctx); nhlfe; nhlfe = nhlfe->next) {
3460 nexthop = nhlfe->nexthop;
3461 if (!nexthop)
3462 continue;
3463 if (cmd == RTM_NEWROUTE) {
3464 /* Count all selected NHLFEs */
3465 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3466 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
3467 nexthop_num++;
3468 } else { /* DEL */
3469 /* Count all installed NHLFEs */
3470 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
3471 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
3472 nexthop_num++;
3473 }
3474 }
3475
3476 if ((nexthop_num == 0) ||
3477 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
3478 return 0;
3479
3480 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
3481 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
3482 req.n.nlmsg_type = cmd;
3483 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
3484
3485 req.r.rtm_family = AF_MPLS;
3486 req.r.rtm_table = RT_TABLE_MAIN;
3487 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
3488 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
3489 req.r.rtm_type = RTN_UNICAST;
3490
3491 if (cmd == RTM_NEWROUTE) {
3492 /* We do a replace to handle update. */
3493 req.n.nlmsg_flags |= NLM_F_REPLACE;
3494
3495 /* set the protocol value if installing */
3496 route_type = re_type_from_lsp_type(
3497 dplane_ctx_get_best_nhlfe(ctx)->type);
3498 req.r.rtm_protocol = zebra2proto(route_type);
3499 }
3500
3501 /* Fill destination */
3502 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
3503 addattr_l(&req.n, sizeof(req), RTA_DST, &lse, sizeof(mpls_lse_t));
3504
3505 /* Fill nexthops (paths) based on single-path or multipath. The paths
3506 * chosen depend on the operation.
3507 */
fc608372 3508 if (nexthop_num == 1) {
16c628de
MS
3509 routedesc = "single-path";
3510 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3511 routedesc);
3512
3513 nexthop_num = 0;
3514 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
3515 nhlfe; nhlfe = nhlfe->next) {
3516 nexthop = nhlfe->nexthop;
3517 if (!nexthop)
3518 continue;
3519
3520 if ((cmd == RTM_NEWROUTE
3521 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3522 && CHECK_FLAG(nexthop->flags,
3523 NEXTHOP_FLAG_ACTIVE)))
3524 || (cmd == RTM_DELROUTE
3525 && (CHECK_FLAG(nhlfe->flags,
3526 NHLFE_FLAG_INSTALLED)
3527 && CHECK_FLAG(nexthop->flags,
3528 NEXTHOP_FLAG_FIB)))) {
3529 /* Add the gateway */
3530 _netlink_mpls_build_singlepath(
9a0132a5 3531 &p, routedesc, nhlfe, &req.n, &req.r,
16c628de
MS
3532 sizeof(req), cmd);
3533
3534 nexthop_num++;
3535 break;
3536 }
3537 }
3538 } else { /* Multipath case */
3539 char buf[NL_PKT_BUF_SIZE];
3540 struct rtattr *rta = (void *)buf;
3541 struct rtnexthop *rtnh;
81793ac1 3542 const union g_addr *src1 = NULL;
16c628de
MS
3543
3544 rta->rta_type = RTA_MULTIPATH;
3545 rta->rta_len = RTA_LENGTH(0);
3546 rtnh = RTA_DATA(rta);
3547
3548 routedesc = "multipath";
3549 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
3550 routedesc);
3551
3552 nexthop_num = 0;
3553 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
3554 nhlfe; nhlfe = nhlfe->next) {
3555 nexthop = nhlfe->nexthop;
3556 if (!nexthop)
3557 continue;
3558
16c628de
MS
3559 if ((cmd == RTM_NEWROUTE
3560 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3561 && CHECK_FLAG(nexthop->flags,
3562 NEXTHOP_FLAG_ACTIVE)))
3563 || (cmd == RTM_DELROUTE
3564 && (CHECK_FLAG(nhlfe->flags,
3565 NHLFE_FLAG_INSTALLED)
3566 && CHECK_FLAG(nexthop->flags,
3567 NEXTHOP_FLAG_FIB)))) {
3568 nexthop_num++;
3569
3570 /* Build the multipath */
9a0132a5
DS
3571 _netlink_mpls_build_multipath(&p, routedesc,
3572 nhlfe, rta, rtnh,
3573 &req.r, &src1);
16c628de
MS
3574 rtnh = RTNH_NEXT(rtnh);
3575 }
3576 }
3577
3578 /* Add the multipath */
3579 if (rta->rta_len > RTA_LENGTH(0))
3580 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
3581 RTA_DATA(rta), RTA_PAYLOAD(rta));
3582 }
3583
3584 /* Talk to netlink socket. */
3585 return netlink_talk_info(netlink_talk_filter, &req.n,
3586 dplane_ctx_get_ns(ctx), 0);
3587}
ddfeb486 3588#endif /* HAVE_NETLINK */