]> git.proxmox.com Git - mirror_frr.git/blame - zebra/rt_netlink.c
bgpd: Make sure we can use `no bgp listen range ...`
[mirror_frr.git] / zebra / rt_netlink.c
CommitLineData
718e3744 1/* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
718e3744 19 */
20
21#include <zebra.h>
ddfeb486
DL
22
23#ifdef HAVE_NETLINK
24
8ccc7e80 25#include <net/if_arp.h>
ba777396
RW
26#include <linux/lwtunnel.h>
27#include <linux/mpls_iptunnel.h>
28#include <linux/neighbour.h>
29#include <linux/rtnetlink.h>
718e3744 30
31/* Hack for GNU libc version 2. */
32#ifndef MSG_TRUNC
33#define MSG_TRUNC 0x20
34#endif /* MSG_TRUNC */
35
36#include "linklist.h"
37#include "if.h"
38#include "log.h"
39#include "prefix.h"
40#include "connected.h"
41#include "table.h"
26e2ae36 42#include "memory.h"
4a1ab8e4 43#include "zebra_memory.h"
718e3744 44#include "rib.h"
e04ab74d 45#include "thread.h"
edd7c245 46#include "privs.h"
fb018d25 47#include "nexthop.h"
78104b9b 48#include "vrf.h"
5e6a74d8 49#include "vty.h"
40c7bdb0 50#include "mpls.h"
13d60d35 51#include "vxlan.h"
718e3744 52
bf094f69 53#include "zebra/zapi_msg.h"
fe18ee2d 54#include "zebra/zebra_ns.h"
7c551956 55#include "zebra/zebra_vrf.h"
6621ca86 56#include "zebra/rt.h"
718e3744 57#include "zebra/redistribute.h"
58#include "zebra/interface.h"
59#include "zebra/debug.h"
12f6fb97 60#include "zebra/rtadv.h"
567b877d 61#include "zebra/zebra_ptm.h"
40c7bdb0 62#include "zebra/zebra_mpls.h"
1fdc9eae 63#include "zebra/kernel_netlink.h"
64#include "zebra/rt_netlink.h"
e3be0432 65#include "zebra/zebra_mroute.h"
2232a77c 66#include "zebra/zebra_vxlan.h"
364fed6b 67#include "zebra/zebra_errors.h"
e3be0432 68
40c7bdb0 69#ifndef AF_MPLS
70#define AF_MPLS 28
71#endif
72
2232a77c 73static vlanid_t filter_vlan = 0;
74
d62a17ae 75struct gw_family_t {
d7c0a89a
QY
76 uint16_t filler;
77 uint16_t family;
d62a17ae 78 union g_addr gate;
40c7bdb0 79};
80
8755598a
DS
81char ipv4_ll_buf[16] = "169.254.0.1";
82struct in_addr ipv4_ll;
83
84/*
85 * The ipv4_ll data structure is used for all 5549
86 * additions to the kernel. Let's figure out the
87 * correct value one time instead for every
88 * install/remove of a 5549 type route
89 */
d62a17ae 90void rt_netlink_init(void)
8755598a 91{
d62a17ae 92 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
8755598a
DS
93}
94
931fa60c
MS
95/*
96 * Mapping from dataplane neighbor flags to netlink flags
97 */
98static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
99{
100 uint8_t flags = 0;
101
102 if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
103 flags |= NTF_EXT_LEARNED;
104 if (dplane_flags & DPLANE_NTF_ROUTER)
105 flags |= NTF_ROUTER;
106
107 return flags;
108}
109
110/*
111 * Mapping from dataplane neighbor state to netlink state
112 */
113static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
114{
115 uint16_t state = 0;
116
117 if (dplane_state & DPLANE_NUD_REACHABLE)
118 state |= NUD_REACHABLE;
119 if (dplane_state & DPLANE_NUD_STALE)
120 state |= NUD_STALE;
121 if (dplane_state & DPLANE_NUD_NOARP)
122 state |= NUD_NOARP;
123 if (dplane_state & DPLANE_NUD_PROBE)
124 state |= NUD_PROBE;
125
126 return state;
127}
128
129
23b1f334
DD
130static inline int is_selfroute(int proto)
131{
d62a17ae 132 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
d4d71f11 133 || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
d62a17ae 134 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
135 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
915902cb 136 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
0761368a 137 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
da82f6b4 138 || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)) {
d62a17ae 139 return 1;
140 }
141
142 return 0;
23b1f334
DD
143}
144
915902cb 145static inline int zebra2proto(int proto)
23b1f334 146{
d62a17ae 147 switch (proto) {
148 case ZEBRA_ROUTE_BABEL:
149 proto = RTPROT_BABEL;
150 break;
151 case ZEBRA_ROUTE_BGP:
152 proto = RTPROT_BGP;
153 break;
154 case ZEBRA_ROUTE_OSPF:
155 case ZEBRA_ROUTE_OSPF6:
156 proto = RTPROT_OSPF;
157 break;
158 case ZEBRA_ROUTE_STATIC:
d4d71f11 159 proto = RTPROT_ZSTATIC;
d62a17ae 160 break;
161 case ZEBRA_ROUTE_ISIS:
162 proto = RTPROT_ISIS;
163 break;
164 case ZEBRA_ROUTE_RIP:
165 proto = RTPROT_RIP;
166 break;
167 case ZEBRA_ROUTE_RIPNG:
168 proto = RTPROT_RIPNG;
169 break;
170 case ZEBRA_ROUTE_NHRP:
171 proto = RTPROT_NHRP;
172 break;
173 case ZEBRA_ROUTE_EIGRP:
174 proto = RTPROT_EIGRP;
175 break;
176 case ZEBRA_ROUTE_LDP:
177 proto = RTPROT_LDP;
178 break;
8a71d93d
DS
179 case ZEBRA_ROUTE_SHARP:
180 proto = RTPROT_SHARP;
181 break;
0761368a
DS
182 case ZEBRA_ROUTE_PBR:
183 proto = RTPROT_PBR;
184 break;
da82f6b4
CF
185 case ZEBRA_ROUTE_OPENFABRIC:
186 proto = RTPROT_OPENFABRIC;
187 break;
a56ec5c0
DS
188 case ZEBRA_ROUTE_TABLE:
189 proto = RTPROT_ZEBRA;
190 break;
d62a17ae 191 default:
0761368a
DS
192 /*
193 * When a user adds a new protocol this will show up
194 * to let them know to do something about it. This
195 * is intentionally a warn because we should see
196 * this as part of development of a new protocol
197 */
9df414fe
QY
198 zlog_debug(
199 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
200 __PRETTY_FUNCTION__, proto);
d62a17ae 201 proto = RTPROT_ZEBRA;
202 break;
203 }
204
205 return proto;
23b1f334
DD
206}
207
915902cb
DS
208static inline int proto2zebra(int proto, int family)
209{
210 switch (proto) {
211 case RTPROT_BABEL:
212 proto = ZEBRA_ROUTE_BABEL;
213 break;
214 case RTPROT_BGP:
215 proto = ZEBRA_ROUTE_BGP;
216 break;
217 case RTPROT_OSPF:
996c9314
LB
218 proto = (family == AFI_IP) ? ZEBRA_ROUTE_OSPF
219 : ZEBRA_ROUTE_OSPF6;
915902cb
DS
220 break;
221 case RTPROT_ISIS:
222 proto = ZEBRA_ROUTE_ISIS;
223 break;
224 case RTPROT_RIP:
225 proto = ZEBRA_ROUTE_RIP;
226 break;
227 case RTPROT_RIPNG:
228 proto = ZEBRA_ROUTE_RIPNG;
229 break;
230 case RTPROT_NHRP:
231 proto = ZEBRA_ROUTE_NHRP;
232 break;
233 case RTPROT_EIGRP:
234 proto = ZEBRA_ROUTE_EIGRP;
235 break;
236 case RTPROT_LDP:
237 proto = ZEBRA_ROUTE_LDP;
238 break;
239 case RTPROT_STATIC:
d4d71f11 240 case RTPROT_ZSTATIC:
915902cb
DS
241 proto = ZEBRA_ROUTE_STATIC;
242 break;
0761368a
DS
243 case RTPROT_SHARP:
244 proto = ZEBRA_ROUTE_SHARP;
245 break;
246 case RTPROT_PBR:
247 proto = ZEBRA_ROUTE_PBR;
248 break;
da82f6b4
CF
249 case RTPROT_OPENFABRIC:
250 proto = ZEBRA_ROUTE_OPENFABRIC;
251 break;
915902cb 252 default:
0761368a
DS
253 /*
254 * When a user adds a new protocol this will show up
255 * to let them know to do something about it. This
256 * is intentionally a warn because we should see
257 * this as part of development of a new protocol
258 */
9df414fe
QY
259 zlog_debug(
260 "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
261 __PRETTY_FUNCTION__, proto);
915902cb
DS
262 proto = ZEBRA_ROUTE_KERNEL;
263 break;
264 }
265 return proto;
266}
267
12f6fb97
DS
268/*
269Pending: create an efficient table_id (in a tree/hash) based lookup)
270 */
d7c0a89a 271static vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
12f6fb97 272{
d62a17ae 273 struct vrf *vrf;
274 struct zebra_vrf *zvrf;
12f6fb97 275
a2addae8 276 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
78dd30b2
PG
277 zvrf = vrf->info;
278 if (zvrf == NULL)
d62a17ae 279 continue;
78dd30b2
PG
280 /* case vrf with netns : match the netnsid */
281 if (vrf_is_backend_netns()) {
282 if (ns_id == zvrf_id(zvrf))
283 return zvrf_id(zvrf);
284 } else {
285 /* VRF is VRF_BACKEND_VRF_LITE */
286 if (zvrf->table_id != table_id)
287 continue;
288 return zvrf_id(zvrf);
289 }
d62a17ae 290 }
12f6fb97 291
d62a17ae 292 return VRF_DEFAULT;
12f6fb97
DS
293}
294
87da6a60
SW
295/**
296 * @parse_encap_mpls() - Parses encapsulated mpls attributes
297 * @tb: Pointer to rtattr to look for nested items in.
298 * @labels: Pointer to store labels in.
299 *
300 * Return: Number of mpls labels found.
301 */
302static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
303{
304 struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
305 mpls_lse_t *lses = NULL;
306 int num_labels = 0;
307 uint32_t ttl = 0;
308 uint32_t bos = 0;
309 uint32_t exp = 0;
310 mpls_label_t label = 0;
311
312 netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
313 lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
314 while (!bos && num_labels < MPLS_MAX_LABELS) {
315 mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
316 labels[num_labels++] = label;
317 }
318
319 return num_labels;
320}
321
718e3744 322/* Looking up routing table by netlink interface. */
2414abd3 323static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
d62a17ae 324 int startup)
718e3744 325{
d62a17ae 326 int len;
327 struct rtmsg *rtm;
328 struct rtattr *tb[RTA_MAX + 1];
d7c0a89a 329 uint8_t flags = 0;
d62a17ae 330 struct prefix p;
792fa92e 331 struct prefix_ipv6 src_p = {};
78dd30b2 332 vrf_id_t vrf_id;
d62a17ae 333
334 char anyaddr[16] = {0};
335
915902cb 336 int proto = ZEBRA_ROUTE_KERNEL;
d62a17ae 337 int index = 0;
338 int table;
339 int metric = 0;
d7c0a89a 340 uint32_t mtu = 0;
25715c7e 341 uint8_t distance = 0;
4e40b6d6 342 route_tag_t tag = 0;
d62a17ae 343
344 void *dest = NULL;
345 void *gate = NULL;
346 void *prefsrc = NULL; /* IPv4 preferred source host address */
347 void *src = NULL; /* IPv6 srcdest source prefix */
e655a03c 348 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
d62a17ae 349
87da6a60
SW
350 /* MPLS labels */
351 mpls_label_t labels[MPLS_MAX_LABELS] = {0};
352 int num_labels = 0;
353
d62a17ae 354 rtm = NLMSG_DATA(h);
355
356 if (startup && h->nlmsg_type != RTM_NEWROUTE)
357 return 0;
e655a03c
DL
358 switch (rtm->rtm_type) {
359 case RTN_UNICAST:
360 break;
361 case RTN_BLACKHOLE:
362 bh_type = BLACKHOLE_NULL;
363 break;
364 case RTN_UNREACHABLE:
365 bh_type = BLACKHOLE_REJECT;
366 break;
367 case RTN_PROHIBIT:
368 bh_type = BLACKHOLE_ADMINPROHIB;
369 break;
370 default:
8c8f250b
DS
371 if (IS_ZEBRA_DEBUG_KERNEL)
372 zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
373 nl_rttype_to_str(rtm->rtm_type),
374 rtm->rtm_type);
d62a17ae 375 return 0;
e655a03c 376 }
d62a17ae 377
378 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618
DS
379 if (len < 0) {
380 zlog_err("%s: Message received from netlink is of a broken size %d %zu",
381 __PRETTY_FUNCTION__, h->nlmsg_len,
382 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 383 return -1;
9bdf8618 384 }
d62a17ae 385
386 memset(tb, 0, sizeof tb);
387 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
388
389 if (rtm->rtm_flags & RTM_F_CLONED)
390 return 0;
391 if (rtm->rtm_protocol == RTPROT_REDIRECT)
392 return 0;
393 if (rtm->rtm_protocol == RTPROT_KERNEL)
394 return 0;
395
396 if (!startup && is_selfroute(rtm->rtm_protocol)
6ab5222f
DS
397 && h->nlmsg_type == RTM_NEWROUTE) {
398 if (IS_ZEBRA_DEBUG_KERNEL)
399 zlog_debug("Route type: %d Received that we think we have originated, ignoring",
400 rtm->rtm_protocol);
d62a17ae 401 return 0;
6ab5222f 402 }
d62a17ae 403
404 /* We don't care about change notifications for the MPLS table. */
405 /* TODO: Revisit this. */
406 if (rtm->rtm_family == AF_MPLS)
407 return 0;
408
409 /* Table corresponding to route. */
410 if (tb[RTA_TABLE])
411 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
412 else
413 table = rtm->rtm_table;
414
415 /* Map to VRF */
78dd30b2 416 vrf_id = vrf_lookup_by_table(table, ns_id);
d62a17ae 417 if (vrf_id == VRF_DEFAULT) {
418 if (!is_zebra_valid_kernel_table(table)
419 && !is_zebra_main_routing_table(table))
420 return 0;
421 }
422
423 /* Route which inserted by Zebra. */
915902cb 424 if (is_selfroute(rtm->rtm_protocol)) {
d62a17ae 425 flags |= ZEBRA_FLAG_SELFROUTE;
915902cb
DS
426 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
427 }
d62a17ae 428 if (tb[RTA_OIF])
429 index = *(int *)RTA_DATA(tb[RTA_OIF]);
430
431 if (tb[RTA_DST])
432 dest = RTA_DATA(tb[RTA_DST]);
433 else
434 dest = anyaddr;
435
436 if (tb[RTA_SRC])
437 src = RTA_DATA(tb[RTA_SRC]);
438 else
439 src = anyaddr;
440
441 if (tb[RTA_PREFSRC])
442 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
443
444 if (tb[RTA_GATEWAY])
445 gate = RTA_DATA(tb[RTA_GATEWAY]);
446
f19435a8
DS
447 if (tb[RTA_PRIORITY])
448 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
d62a17ae 449
4e40b6d6
KK
450#if defined(SUPPORT_REALMS)
451 if (tb[RTA_FLOW])
452 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
453#endif
454
f19435a8
DS
455 if (tb[RTA_METRICS]) {
456 struct rtattr *mxrta[RTAX_MAX + 1];
d62a17ae 457
f19435a8 458 memset(mxrta, 0, sizeof mxrta);
996c9314 459 netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
f19435a8 460 RTA_PAYLOAD(tb[RTA_METRICS]));
d62a17ae 461
f19435a8 462 if (mxrta[RTAX_MTU])
d7c0a89a 463 mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
d62a17ae 464 }
465
466 if (rtm->rtm_family == AF_INET) {
467 p.family = AF_INET;
930571d2 468 if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
e17d9b2d 469 zlog_err(
75829703 470 "Invalid destination prefix length: %u received from kernel route change",
930571d2 471 rtm->rtm_dst_len);
e17d9b2d 472 return -1;
930571d2 473 }
d62a17ae 474 memcpy(&p.u.prefix4, dest, 4);
475 p.prefixlen = rtm->rtm_dst_len;
476
1f610a1f
CF
477 if (rtm->rtm_src_len != 0) {
478 char buf[PREFIX_STRLEN];
9df414fe 479 flog_warn(
e914ccbe 480 EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
9df414fe
QY
481 "unsupported IPv4 sourcedest route (dest %s vrf %u)",
482 prefix2str(&p, buf, sizeof(buf)), vrf_id);
1f610a1f
CF
483 return 0;
484 }
930571d2 485
1f610a1f
CF
486 /* Force debug below to not display anything for source */
487 src_p.prefixlen = 0;
d62a17ae 488 } else if (rtm->rtm_family == AF_INET6) {
489 p.family = AF_INET6;
930571d2 490 if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
e17d9b2d 491 zlog_err(
75829703 492 "Invalid destination prefix length: %u received from kernel route change",
930571d2 493 rtm->rtm_dst_len);
e17d9b2d 494 return -1;
930571d2 495 }
d62a17ae 496 memcpy(&p.u.prefix6, dest, 16);
497 p.prefixlen = rtm->rtm_dst_len;
498
499 src_p.family = AF_INET6;
930571d2 500 if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
e17d9b2d 501 zlog_err(
75829703 502 "Invalid source prefix length: %u received from kernel route change",
930571d2 503 rtm->rtm_src_len);
e17d9b2d 504 return -1;
930571d2 505 }
d62a17ae 506 memcpy(&src_p.prefix, src, 16);
507 src_p.prefixlen = rtm->rtm_src_len;
508 }
509
25715c7e
DS
510 /*
511 * For ZEBRA_ROUTE_KERNEL types:
512 *
513 * The metric/priority of the route received from the kernel
514 * is a 32 bit number. We are going to interpret the high
515 * order byte as the Admin Distance and the low order 3 bytes
516 * as the metric.
517 *
518 * This will allow us to do two things:
519 * 1) Allow the creation of kernel routes that can be
520 * overridden by zebra.
521 * 2) Allow the old behavior for 'most' kernel route types
522 * if a user enters 'ip route ...' v4 routes get a metric
523 * of 0 and v6 routes get a metric of 1024. Both of these
524 * values will end up with a admin distance of 0, which
525 * will cause them to win for the purposes of zebra.
526 */
527 if (proto == ZEBRA_ROUTE_KERNEL) {
528 distance = (metric >> 24) & 0xFF;
996c9314 529 metric = (metric & 0x00FFFFFF);
25715c7e
DS
530 }
531
d62a17ae 532 if (IS_ZEBRA_DEBUG_KERNEL) {
533 char buf[PREFIX_STRLEN];
534 char buf2[PREFIX_STRLEN];
45df4e96 535 zlog_debug("%s %s%s%s vrf %u(%u) metric: %d Admin Distance: %d",
996c9314
LB
536 nl_msg_type_to_str(h->nlmsg_type),
537 prefix2str(&p, buf, sizeof(buf)),
538 src_p.prefixlen ? " from " : "",
539 src_p.prefixlen
540 ? prefix2str(&src_p, buf2, sizeof(buf2))
541 : "",
45df4e96 542 vrf_id, table, metric, distance);
d62a17ae 543 }
544
545 afi_t afi = AFI_IP;
546 if (rtm->rtm_family == AF_INET6)
547 afi = AFI_IP6;
548
549 if (h->nlmsg_type == RTM_NEWROUTE) {
8795f904
DS
550 struct interface *ifp;
551 vrf_id_t nh_vrf_id = vrf_id;
552
fd36be7e
DL
553 if (!tb[RTA_MULTIPATH]) {
554 struct nexthop nh;
555 size_t sz = (afi == AFI_IP) ? 4 : 16;
556
557 memset(&nh, 0, sizeof(nh));
af760ec1
DS
558
559 if (bh_type == BLACKHOLE_UNSPEC) {
560 if (index && !gate)
561 nh.type = NEXTHOP_TYPE_IFINDEX;
562 else if (index && gate)
996c9314
LB
563 nh.type =
564 (afi == AFI_IP)
565 ? NEXTHOP_TYPE_IPV4_IFINDEX
566 : NEXTHOP_TYPE_IPV6_IFINDEX;
af760ec1 567 else if (!index && gate)
1dca2eaa
RW
568 nh.type = (afi == AFI_IP)
569 ? NEXTHOP_TYPE_IPV4
570 : NEXTHOP_TYPE_IPV6;
af760ec1
DS
571 else {
572 nh.type = NEXTHOP_TYPE_BLACKHOLE;
573 nh.bh_type = bh_type;
574 }
575 } else {
fd36be7e 576 nh.type = NEXTHOP_TYPE_BLACKHOLE;
e655a03c
DL
577 nh.bh_type = bh_type;
578 }
fd36be7e
DL
579 nh.ifindex = index;
580 if (prefsrc)
581 memcpy(&nh.src, prefsrc, sz);
582 if (gate)
583 memcpy(&nh.gate, gate, sz);
915902cb 584
8795f904 585 if (index) {
fac4d51e
PG
586 ifp = if_lookup_by_index_per_ns(
587 zebra_ns_lookup(ns_id),
588 index);
8795f904
DS
589 if (ifp)
590 nh_vrf_id = ifp->vrf_id;
591 }
4a7371e9 592 nh.vrf_id = nh_vrf_id;
8795f904 593
87da6a60
SW
594 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
595 && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
596 == LWTUNNEL_ENCAP_MPLS) {
597 num_labels =
598 parse_encap_mpls(tb[RTA_ENCAP], labels);
599 }
600
6cae47ed
DS
601 if (rtm->rtm_flags & RTNH_F_ONLINK)
602 SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
603
87da6a60
SW
604 if (num_labels)
605 nexthop_add_labels(&nh, ZEBRA_LSP_STATIC,
606 num_labels, labels);
607
4a7371e9 608 rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
1f610a1f 609 &src_p, &nh, table, metric, mtu, distance, tag);
fd36be7e 610 } else {
d62a17ae 611 /* This is a multipath route */
612
613 struct route_entry *re;
614 struct rtnexthop *rtnh =
615 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
616
617 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
618
619 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
915902cb 620 re->type = proto;
25715c7e 621 re->distance = distance;
d62a17ae 622 re->flags = flags;
623 re->metric = metric;
624 re->mtu = mtu;
625 re->vrf_id = vrf_id;
626 re->table = table;
627 re->nexthop_num = 0;
98572489 628 re->uptime = monotime(NULL);
4e40b6d6 629 re->tag = tag;
d62a17ae 630
631 for (;;) {
87da6a60 632 struct nexthop *nh = NULL;
c683bd44 633
d62a17ae 634 if (len < (int)sizeof(*rtnh)
635 || rtnh->rtnh_len > len)
636 break;
637
638 index = rtnh->rtnh_ifindex;
8795f904
DS
639 if (index) {
640 /*
641 * Yes we are looking this up
642 * for every nexthop and just
643 * using the last one looked
644 * up right now
645 */
fac4d51e
PG
646 ifp = if_lookup_by_index_per_ns(
647 zebra_ns_lookup(ns_id),
648 index);
8795f904 649 if (ifp)
4a7371e9
DS
650 nh_vrf_id = ifp->vrf_id;
651 else {
9df414fe 652 flog_warn(
e914ccbe 653 EC_ZEBRA_UNKNOWN_INTERFACE,
4a7371e9
DS
654 "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
655 __PRETTY_FUNCTION__,
656 index);
657 nh_vrf_id = VRF_DEFAULT;
658 }
659 } else
660 nh_vrf_id = vrf_id;
661
d62a17ae 662 gate = 0;
663 if (rtnh->rtnh_len > sizeof(*rtnh)) {
664 memset(tb, 0, sizeof(tb));
665 netlink_parse_rtattr(
666 tb, RTA_MAX, RTNH_DATA(rtnh),
667 rtnh->rtnh_len - sizeof(*rtnh));
668 if (tb[RTA_GATEWAY])
669 gate = RTA_DATA(
670 tb[RTA_GATEWAY]);
87da6a60
SW
671 if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
672 && *(uint16_t *)RTA_DATA(
673 tb[RTA_ENCAP_TYPE])
674 == LWTUNNEL_ENCAP_MPLS) {
675 num_labels = parse_encap_mpls(
676 tb[RTA_ENCAP], labels);
677 }
d62a17ae 678 }
679
680 if (gate) {
681 if (rtm->rtm_family == AF_INET) {
682 if (index)
87da6a60 683 nh = route_entry_nexthop_ipv4_ifindex_add(
d62a17ae 684 re, gate,
4a7371e9
DS
685 prefsrc, index,
686 nh_vrf_id);
d62a17ae 687 else
87da6a60 688 nh = route_entry_nexthop_ipv4_add(
d62a17ae 689 re, gate,
4a7371e9
DS
690 prefsrc,
691 nh_vrf_id);
d62a17ae 692 } else if (rtm->rtm_family
693 == AF_INET6) {
694 if (index)
87da6a60 695 nh = route_entry_nexthop_ipv6_ifindex_add(
4a7371e9
DS
696 re, gate, index,
697 nh_vrf_id);
d62a17ae 698 else
87da6a60 699 nh = route_entry_nexthop_ipv6_add(
4a7371e9
DS
700 re, gate,
701 nh_vrf_id);
d62a17ae 702 }
703 } else
87da6a60 704 nh = route_entry_nexthop_ifindex_add(
4a7371e9 705 re, index, nh_vrf_id);
d62a17ae 706
87da6a60
SW
707 if (nh && num_labels)
708 nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
709 num_labels, labels);
710
6cae47ed
DS
711 if (nh && (rtnh->rtnh_flags & RTNH_F_ONLINK))
712 SET_FLAG(nh->flags,
713 NEXTHOP_FLAG_ONLINK);
714
3c04071d
SW
715 if (rtnh->rtnh_len == 0)
716 break;
717
d62a17ae 718 len -= NLMSG_ALIGN(rtnh->rtnh_len);
719 rtnh = RTNH_NEXT(rtnh);
720 }
721
722 zserv_nexthop_num_warn(__func__,
723 (const struct prefix *)&p,
724 re->nexthop_num);
725 if (re->nexthop_num == 0)
726 XFREE(MTYPE_RE, re);
727 else
1f610a1f
CF
728 rib_add_multipath(afi, SAFI_UNICAST, &p,
729 &src_p, re);
d62a17ae 730 }
731 } else {
fd36be7e
DL
732 if (!tb[RTA_MULTIPATH]) {
733 struct nexthop nh;
734 size_t sz = (afi == AFI_IP) ? 4 : 16;
735
736 memset(&nh, 0, sizeof(nh));
8ba5bd58
RW
737 if (bh_type == BLACKHOLE_UNSPEC) {
738 if (index && !gate)
739 nh.type = NEXTHOP_TYPE_IFINDEX;
740 else if (index && gate)
741 nh.type =
742 (afi == AFI_IP)
743 ? NEXTHOP_TYPE_IPV4_IFINDEX
744 : NEXTHOP_TYPE_IPV6_IFINDEX;
745 else if (!index && gate)
746 nh.type = (afi == AFI_IP)
747 ? NEXTHOP_TYPE_IPV4
60466a63 748 : NEXTHOP_TYPE_IPV6;
8ba5bd58
RW
749 else {
750 nh.type = NEXTHOP_TYPE_BLACKHOLE;
751 nh.bh_type = BLACKHOLE_UNSPEC;
752 }
753 } else {
fd36be7e 754 nh.type = NEXTHOP_TYPE_BLACKHOLE;
8ba5bd58
RW
755 nh.bh_type = bh_type;
756 }
fd36be7e
DL
757 nh.ifindex = index;
758 if (gate)
759 memcpy(&nh.gate, gate, sz);
996c9314 760 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
51c4ed0a
DS
761 &p, &src_p, &nh, table, metric, distance,
762 true);
fd36be7e
DL
763 } else {
764 /* XXX: need to compare the entire list of nexthops
765 * here for NLM_F_APPEND stupidity */
996c9314 766 rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
51c4ed0a
DS
767 &p, &src_p, NULL, table, metric, distance,
768 true);
d62a17ae 769 }
770 }
771
772 return 0;
718e3744 773}
774
e3be0432
DS
775static struct mcast_route_data *mroute = NULL;
776
2414abd3 777static int netlink_route_change_read_multicast(struct nlmsghdr *h,
d62a17ae 778 ns_id_t ns_id, int startup)
565fdc75 779{
d62a17ae 780 int len;
781 struct rtmsg *rtm;
782 struct rtattr *tb[RTA_MAX + 1];
783 struct mcast_route_data *m;
784 struct mcast_route_data mr;
785 int iif = 0;
786 int count;
787 int oif[256];
788 int oif_count = 0;
789 char sbuf[40];
790 char gbuf[40];
791 char oif_list[256] = "\0";
78dd30b2 792 vrf_id_t vrf;
43b5cc5e 793 int table;
d62a17ae 794
795 if (mroute)
796 m = mroute;
797 else {
798 memset(&mr, 0, sizeof(mr));
799 m = &mr;
800 }
801
802 rtm = NLMSG_DATA(h);
803
804 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
805
806 memset(tb, 0, sizeof tb);
807 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
90d82769 808
43b5cc5e
DS
809 if (tb[RTA_TABLE])
810 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
811 else
812 table = rtm->rtm_table;
813
78dd30b2 814 vrf = vrf_lookup_by_table(table, ns_id);
43b5cc5e 815
d62a17ae 816 if (tb[RTA_IIF])
817 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
818
819 if (tb[RTA_SRC])
bd8b9272 820 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
d62a17ae 821
822 if (tb[RTA_DST])
bd8b9272 823 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
d62a17ae 824
62819462 825 if (tb[RTA_EXPIRES])
d62a17ae 826 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
827
828 if (tb[RTA_MULTIPATH]) {
829 struct rtnexthop *rtnh =
830 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
831
832 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
833 for (;;) {
834 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
835 break;
836
837 oif[oif_count] = rtnh->rtnh_ifindex;
838 oif_count++;
839
3c04071d
SW
840 if (rtnh->rtnh_len == 0)
841 break;
842
d62a17ae 843 len -= NLMSG_ALIGN(rtnh->rtnh_len);
844 rtnh = RTNH_NEXT(rtnh);
845 }
846 }
847
848 if (IS_ZEBRA_DEBUG_KERNEL) {
822c9af2
SW
849 struct interface *ifp = NULL;
850 struct zebra_vrf *zvrf = NULL;
851
0af35d90
RW
852 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
853 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
d62a17ae 854 for (count = 0; count < oif_count; count++) {
855 ifp = if_lookup_by_index(oif[count], vrf);
856 char temp[256];
857
5b4256ca
DS
858 sprintf(temp, "%s(%d) ", ifp ? ifp->name : "Unknown",
859 oif[count]);
eab4a5c2 860 strlcat(oif_list, temp, sizeof(oif_list));
d62a17ae 861 }
822c9af2 862 zvrf = zebra_vrf_lookup_by_id(vrf);
d62a17ae 863 ifp = if_lookup_by_index(iif, vrf);
822c9af2
SW
864 zlog_debug(
865 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s(%d) OIF: %s jiffies: %lld",
866 (zvrf ? zvrf->vrf->name : "Unknown"), vrf,
867 nl_msg_type_to_str(h->nlmsg_type), sbuf, gbuf,
868 ifp ? ifp->name : "Unknown", iif, oif_list,
869 m->lastused);
90d82769 870 }
d62a17ae 871 return 0;
565fdc75
DS
872}
873
2414abd3 874int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
565fdc75 875{
d62a17ae 876 int len;
d62a17ae 877 struct rtmsg *rtm;
878
879 rtm = NLMSG_DATA(h);
880
881 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
882 /* If this is not route add/delete message print warning. */
9165c5f5 883 zlog_debug("Kernel message: %s NS %u",
87b5d1b0 884 nl_msg_type_to_str(h->nlmsg_type), ns_id);
d62a17ae 885 return 0;
886 }
887
c25e2f1a
DS
888 if (!(rtm->rtm_family == AF_INET ||
889 rtm->rtm_family == AF_INET6 ||
890 rtm->rtm_family == RTNL_FAMILY_IPMR )) {
9df414fe 891 flog_warn(
e914ccbe 892 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
893 "Invalid address family: %u received from kernel route change: %s",
894 rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
895 return 0;
896 }
897
d62a17ae 898 /* Connected route. */
899 if (IS_ZEBRA_DEBUG_KERNEL)
78dd30b2 900 zlog_debug("%s %s %s proto %s NS %u",
d62a17ae 901 nl_msg_type_to_str(h->nlmsg_type),
902 nl_family_to_str(rtm->rtm_family),
903 nl_rttype_to_str(rtm->rtm_type),
78dd30b2 904 nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
d62a17ae 905
d62a17ae 906
907 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
9bdf8618
DS
908 if (len < 0) {
909 zlog_err("%s: Message received from netlink is of a broken size: %d %zu",
910 __PRETTY_FUNCTION__,
911 h->nlmsg_len,
912 (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
d62a17ae 913 return -1;
9bdf8618 914 }
d62a17ae 915
e655a03c 916 if (rtm->rtm_type == RTN_MULTICAST)
2414abd3 917 netlink_route_change_read_multicast(h, ns_id, startup);
e655a03c 918 else
2414abd3 919 netlink_route_change_read_unicast(h, ns_id, startup);
d62a17ae 920 return 0;
565fdc75
DS
921}
922
289602d7 923/* Request for specific route information from the kernel */
d62a17ae 924static int netlink_request_route(struct zebra_ns *zns, int family, int type)
289602d7 925{
d62a17ae 926 struct {
927 struct nlmsghdr n;
928 struct rtmsg rtm;
929 } req;
930
931 /* Form the request, specifying filter (rtattr) if needed. */
932 memset(&req, 0, sizeof(req));
933 req.n.nlmsg_type = type;
718f9b0f 934 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 935 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
936 req.rtm.rtm_family = family;
937
938 return netlink_request(&zns->netlink_cmd, &req.n);
289602d7 939}
940
718e3744 941/* Routing table read function using netlink interface. Only called
942 bootstrap time. */
d62a17ae 943int netlink_route_read(struct zebra_ns *zns)
718e3744 944{
d62a17ae 945 int ret;
85a75f1e
MS
946 struct zebra_dplane_info dp_info;
947
948 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 949
950 /* Get IPv4 routing table. */
951 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
952 if (ret < 0)
953 return ret;
954 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 955 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 956 if (ret < 0)
957 return ret;
958
959 /* Get IPv6 routing table. */
960 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
961 if (ret < 0)
962 return ret;
963 ret = netlink_parse_info(netlink_route_change_read_unicast,
85a75f1e 964 &zns->netlink_cmd, &dp_info, 0, 1);
d62a17ae 965 if (ret < 0)
966 return ret;
967
968 return 0;
718e3744 969}
970
d7c0a89a
QY
971static void _netlink_route_nl_add_gateway_info(uint8_t route_family,
972 uint8_t gw_family,
d62a17ae 973 struct nlmsghdr *nlmsg,
974 size_t req_size, int bytelen,
81793ac1 975 const struct nexthop *nexthop)
40c7bdb0 976{
d62a17ae 977 if (route_family == AF_MPLS) {
978 struct gw_family_t gw_fam;
979
980 gw_fam.family = gw_family;
981 if (gw_family == AF_INET)
982 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
983 else
984 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
985 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
986 bytelen + 2);
987 } else {
988 if (gw_family == AF_INET)
989 addattr_l(nlmsg, req_size, RTA_GATEWAY,
990 &nexthop->gate.ipv4, bytelen);
991 else
992 addattr_l(nlmsg, req_size, RTA_GATEWAY,
993 &nexthop->gate.ipv6, bytelen);
994 }
40c7bdb0 995}
996
d7c0a89a
QY
997static void _netlink_route_rta_add_gateway_info(uint8_t route_family,
998 uint8_t gw_family,
d62a17ae 999 struct rtattr *rta,
1000 struct rtnexthop *rtnh,
1001 size_t req_size, int bytelen,
81793ac1 1002 const struct nexthop *nexthop)
40c7bdb0 1003{
d62a17ae 1004 if (route_family == AF_MPLS) {
1005 struct gw_family_t gw_fam;
1006
1007 gw_fam.family = gw_family;
1008 if (gw_family == AF_INET)
1009 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1010 else
1011 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1012 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
1013 bytelen + 2);
1014 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
1015 } else {
1016 if (gw_family == AF_INET)
1017 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1018 &nexthop->gate.ipv4, bytelen);
1019 else
1020 rta_addattr_l(rta, req_size, RTA_GATEWAY,
1021 &nexthop->gate.ipv6, bytelen);
1022 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1023 }
40c7bdb0 1024}
1025
fa713d9e
CF
1026/* This function takes a nexthop as argument and adds
1027 * the appropriate netlink attributes to an existing
1028 * netlink message.
1029 *
1030 * @param routedesc: Human readable description of route type
1031 * (direct/recursive, single-/multipath)
1032 * @param bytelen: Length of addresses in bytes.
1033 * @param nexthop: Nexthop information
1034 * @param nlmsg: nlmsghdr structure to fill in.
1035 * @param req_size: The size allocated for the message.
1036 */
d62a17ae 1037static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
81793ac1 1038 const struct nexthop *nexthop,
d62a17ae 1039 struct nlmsghdr *nlmsg,
1040 struct rtmsg *rtmsg,
1041 size_t req_size, int cmd)
fa713d9e 1042{
8ecdb26e 1043 struct mpls_label_stack *nh_label;
d62a17ae 1044 mpls_lse_t out_lse[MPLS_MAX_LABELS];
fa712963 1045 int num_labels = 0;
9a62e84b 1046 char label_buf[256];
d62a17ae 1047
1048 /*
1049 * label_buf is *only* currently used within debugging.
1050 * As such when we assign it we are guarding it inside
1051 * a debug test. If you want to change this make sure
1052 * you fix this assumption
1053 */
1054 label_buf[0] = '\0';
d62a17ae 1055
fa712963 1056 assert(nexthop);
b43434ad 1057 char label_buf1[20];
d62a17ae 1058
b43434ad 1059 nh_label = nexthop->nh_label;
fa712963 1060
b43434ad
SW
1061 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1062 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1063 continue;
fa712963 1064
b43434ad
SW
1065 if (IS_ZEBRA_DEBUG_KERNEL) {
1066 if (!num_labels)
1067 sprintf(label_buf, "label %u",
1068 nh_label->label[i]);
1069 else {
1070 sprintf(label_buf1, "/%u", nh_label->label[i]);
1071 strlcat(label_buf, label_buf1,
1072 sizeof(label_buf));
d62a17ae 1073 }
1074 }
b43434ad
SW
1075
1076 out_lse[num_labels] =
1077 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1078 num_labels++;
fa712963
RW
1079 }
1080
1081 if (num_labels) {
1082 /* Set the BoS bit */
1083 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1084
1085 if (rtmsg->rtm_family == AF_MPLS)
1086 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
1087 num_labels * sizeof(mpls_lse_t));
1088 else {
1089 struct rtattr *nest;
d7c0a89a 1090 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
fa712963
RW
1091
1092 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE, &encap,
d7c0a89a 1093 sizeof(uint16_t));
fa712963
RW
1094 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
1095 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST, &out_lse,
1096 num_labels * sizeof(mpls_lse_t));
1097 addattr_nest_end(nlmsg, nest);
66d42727 1098 }
0aabccc0 1099 }
fa713d9e 1100
d62a17ae 1101 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1102 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1103
1104 if (rtmsg->rtm_family == AF_INET
1105 && (nexthop->type == NEXTHOP_TYPE_IPV6
1106 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1107 rtmsg->rtm_flags |= RTNH_F_ONLINK;
1108 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
1109 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1110
1111 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
1112 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1113 &nexthop->rmap_src.ipv4, bytelen);
1114 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
1115 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1116 &nexthop->src.ipv4, bytelen);
1117
1118 if (IS_ZEBRA_DEBUG_KERNEL)
1119 zlog_debug(
1120 " 5549: _netlink_route_build_singlepath() (%s): "
7556c3fd 1121 "nexthop via %s %s if %u(%u)",
d62a17ae 1122 routedesc, ipv4_ll_buf, label_buf,
7556c3fd 1123 nexthop->ifindex, nexthop->vrf_id);
d62a17ae 1124 return;
0aabccc0
DD
1125 }
1126
d62a17ae 1127 if (nexthop->type == NEXTHOP_TYPE_IPV4
1128 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1129 /* Send deletes to the kernel without specifying the next-hop */
1130 if (cmd != RTM_DELROUTE)
1131 _netlink_route_nl_add_gateway_info(
1132 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1133 bytelen, nexthop);
1134
1135 if (cmd == RTM_NEWROUTE) {
1136 if (nexthop->rmap_src.ipv4.s_addr)
1137 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1138 &nexthop->rmap_src.ipv4, bytelen);
1139 else if (nexthop->src.ipv4.s_addr)
1140 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1141 &nexthop->src.ipv4, bytelen);
1142 }
1143
1144 if (IS_ZEBRA_DEBUG_KERNEL)
1145 zlog_debug(
1146 "netlink_route_multipath() (%s): "
7556c3fd 1147 "nexthop via %s %s if %u(%u)",
d62a17ae 1148 routedesc, inet_ntoa(nexthop->gate.ipv4),
7556c3fd 1149 label_buf, nexthop->ifindex, nexthop->vrf_id);
0aabccc0 1150 }
fa713d9e 1151
d62a17ae 1152 if (nexthop->type == NEXTHOP_TYPE_IPV6
1153 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1154 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1155 nlmsg, req_size, bytelen,
1156 nexthop);
1157
1158 if (cmd == RTM_NEWROUTE) {
1159 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1160 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1161 &nexthop->rmap_src.ipv6, bytelen);
1162 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1163 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1164 &nexthop->src.ipv6, bytelen);
1165 }
fa713d9e 1166
d62a17ae 1167 if (IS_ZEBRA_DEBUG_KERNEL)
1168 zlog_debug(
1169 "netlink_route_multipath() (%s): "
7556c3fd 1170 "nexthop via %s %s if %u(%u)",
d62a17ae 1171 routedesc, inet6_ntoa(nexthop->gate.ipv6),
7556c3fd 1172 label_buf, nexthop->ifindex, nexthop->vrf_id);
d62a17ae 1173 }
5e210522
DS
1174
1175 /*
1176 * We have the ifindex so we should always send it
1177 * This is especially useful if we are doing route
1178 * leaking.
1179 */
1180 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
d62a17ae 1181 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1182
275565fb 1183 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1184 if (cmd == RTM_NEWROUTE) {
1185 if (nexthop->rmap_src.ipv4.s_addr)
1186 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1187 &nexthop->rmap_src.ipv4, bytelen);
1188 else if (nexthop->src.ipv4.s_addr)
1189 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1190 &nexthop->src.ipv4, bytelen);
1191 }
fa713d9e 1192
d62a17ae 1193 if (IS_ZEBRA_DEBUG_KERNEL)
1194 zlog_debug(
1195 "netlink_route_multipath() (%s): "
7556c3fd
DS
1196 "nexthop via if %u(%u)",
1197 routedesc, nexthop->ifindex, nexthop->vrf_id);
0aabccc0 1198 }
fa713d9e
CF
1199}
1200
1201/* This function takes a nexthop as argument and
1202 * appends to the given rtattr/rtnexthop pair the
1203 * representation of the nexthop. If the nexthop
1204 * defines a preferred source, the src parameter
1205 * will be modified to point to that src, otherwise
1206 * it will be kept unmodified.
1207 *
1208 * @param routedesc: Human readable description of route type
1209 * (direct/recursive, single-/multipath)
1210 * @param bytelen: Length of addresses in bytes.
1211 * @param nexthop: Nexthop information
1212 * @param rta: rtnetlink attribute structure
1213 * @param rtnh: pointer to an rtnetlink nexthop structure
1214 * @param src: pointer pointing to a location where
1215 * the prefsrc should be stored.
1216 */
d62a17ae 1217static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
81793ac1 1218 const struct nexthop *nexthop,
d62a17ae 1219 struct rtattr *rta,
1220 struct rtnexthop *rtnh,
1221 struct rtmsg *rtmsg,
81793ac1 1222 const union g_addr **src)
fa713d9e 1223{
8ecdb26e 1224 struct mpls_label_stack *nh_label;
d62a17ae 1225 mpls_lse_t out_lse[MPLS_MAX_LABELS];
fa712963 1226 int num_labels = 0;
9a62e84b 1227 char label_buf[256];
d62a17ae 1228
1229 rtnh->rtnh_len = sizeof(*rtnh);
1230 rtnh->rtnh_flags = 0;
1231 rtnh->rtnh_hops = 0;
1232 rta->rta_len += rtnh->rtnh_len;
1233
1234 /*
1235 * label_buf is *only* currently used within debugging.
1236 * As such when we assign it we are guarding it inside
1237 * a debug test. If you want to change this make sure
1238 * you fix this assumption
1239 */
1240 label_buf[0] = '\0';
d62a17ae 1241
fa712963 1242 assert(nexthop);
b43434ad 1243 char label_buf1[20];
d62a17ae 1244
b43434ad 1245 nh_label = nexthop->nh_label;
fa712963 1246
b43434ad
SW
1247 for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1248 if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1249 continue;
fa712963 1250
b43434ad
SW
1251 if (IS_ZEBRA_DEBUG_KERNEL) {
1252 if (!num_labels)
1253 sprintf(label_buf, "label %u",
1254 nh_label->label[i]);
1255 else {
1256 sprintf(label_buf1, "/%u", nh_label->label[i]);
1257 strlcat(label_buf, label_buf1,
1258 sizeof(label_buf));
d62a17ae 1259 }
1260 }
b43434ad
SW
1261
1262 out_lse[num_labels] =
1263 mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1264 num_labels++;
fa712963
RW
1265 }
1266
1267 if (num_labels) {
1268 /* Set the BoS bit */
1269 out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1270
1271 if (rtmsg->rtm_family == AF_MPLS) {
1272 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1273 &out_lse,
1274 num_labels * sizeof(mpls_lse_t));
1275 rtnh->rtnh_len +=
1276 RTA_LENGTH(num_labels * sizeof(mpls_lse_t));
1277 } else {
1278 struct rtattr *nest;
d7c0a89a 1279 uint16_t encap = LWTUNNEL_ENCAP_MPLS;
fa712963
RW
1280 int len = rta->rta_len;
1281
1282 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_ENCAP_TYPE,
d7c0a89a 1283 &encap, sizeof(uint16_t));
fa712963
RW
1284 nest = rta_nest(rta, NL_PKT_BUF_SIZE, RTA_ENCAP);
1285 rta_addattr_l(rta, NL_PKT_BUF_SIZE, MPLS_IPTUNNEL_DST,
1286 &out_lse,
1287 num_labels * sizeof(mpls_lse_t));
1288 rta_nest_end(rta, nest);
1289 rtnh->rtnh_len += rta->rta_len - len;
66d42727 1290 }
d62a17ae 1291 }
1292
1293 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1294 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1295
1296 if (rtmsg->rtm_family == AF_INET
1297 && (nexthop->type == NEXTHOP_TYPE_IPV6
1298 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1299 bytelen = 4;
1300 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1301 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1302 bytelen);
1303 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1304 rtnh->rtnh_ifindex = nexthop->ifindex;
1305
1306 if (nexthop->rmap_src.ipv4.s_addr)
1307 *src = &nexthop->rmap_src;
1308 else if (nexthop->src.ipv4.s_addr)
1309 *src = &nexthop->src;
1310
1311 if (IS_ZEBRA_DEBUG_KERNEL)
1312 zlog_debug(
1313 " 5549: netlink_route_build_multipath() (%s): "
1314 "nexthop via %s %s if %u",
1315 routedesc, ipv4_ll_buf, label_buf,
1316 nexthop->ifindex);
1317 return;
1318 }
1319
1320 if (nexthop->type == NEXTHOP_TYPE_IPV4
1321 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1322 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1323 rta, rtnh, NL_PKT_BUF_SIZE,
1324 bytelen, nexthop);
1325 if (nexthop->rmap_src.ipv4.s_addr)
1326 *src = &nexthop->rmap_src;
1327 else if (nexthop->src.ipv4.s_addr)
1328 *src = &nexthop->src;
1329
1330 if (IS_ZEBRA_DEBUG_KERNEL)
1331 zlog_debug(
1332 "netlink_route_multipath() (%s): "
1333 "nexthop via %s %s if %u",
1334 routedesc, inet_ntoa(nexthop->gate.ipv4),
1335 label_buf, nexthop->ifindex);
1336 }
1337 if (nexthop->type == NEXTHOP_TYPE_IPV6
1338 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1339 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1340 rta, rtnh, NL_PKT_BUF_SIZE,
1341 bytelen, nexthop);
1342
1343 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1344 *src = &nexthop->rmap_src;
1345 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1346 *src = &nexthop->src;
1347
1348 if (IS_ZEBRA_DEBUG_KERNEL)
1349 zlog_debug(
1350 "netlink_route_multipath() (%s): "
1351 "nexthop via %s %s if %u",
1352 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1353 label_buf, nexthop->ifindex);
1354 }
5e210522
DS
1355
1356 /*
1357 * We have figured out the ifindex so we should always send it
1358 * This is especially useful if we are doing route
1359 * leaking.
1360 */
1361 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1362 rtnh->rtnh_ifindex = nexthop->ifindex;
1363
d62a17ae 1364 /* ifindex */
275565fb 1365 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
d62a17ae 1366 if (nexthop->rmap_src.ipv4.s_addr)
1367 *src = &nexthop->rmap_src;
1368 else if (nexthop->src.ipv4.s_addr)
1369 *src = &nexthop->src;
1370
1371 if (IS_ZEBRA_DEBUG_KERNEL)
1372 zlog_debug(
1373 "netlink_route_multipath() (%s): "
1374 "nexthop via if %u",
1375 routedesc, nexthop->ifindex);
d62a17ae 1376 }
fa713d9e
CF
1377}
1378
d62a17ae 1379static inline void _netlink_mpls_build_singlepath(const char *routedesc,
81793ac1 1380 const zebra_nhlfe_t *nhlfe,
d62a17ae 1381 struct nlmsghdr *nlmsg,
1382 struct rtmsg *rtmsg,
1383 size_t req_size, int cmd)
40c7bdb0 1384{
d62a17ae 1385 int bytelen;
d7c0a89a 1386 uint8_t family;
40c7bdb0 1387
d62a17ae 1388 family = NHLFE_FAMILY(nhlfe);
1389 bytelen = (family == AF_INET ? 4 : 16);
1390 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1391 nlmsg, rtmsg, req_size, cmd);
40c7bdb0 1392}
1393
1394
1395static inline void
81793ac1 1396_netlink_mpls_build_multipath(const char *routedesc, const zebra_nhlfe_t *nhlfe,
d62a17ae 1397 struct rtattr *rta, struct rtnexthop *rtnh,
81793ac1 1398 struct rtmsg *rtmsg, const union g_addr **src)
40c7bdb0 1399{
d62a17ae 1400 int bytelen;
d7c0a89a 1401 uint8_t family;
40c7bdb0 1402
d62a17ae 1403 family = NHLFE_FAMILY(nhlfe);
1404 bytelen = (family == AF_INET ? 4 : 16);
1405 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1406 rtnh, rtmsg, src);
40c7bdb0 1407}
1408
1409
fa713d9e
CF
1410/* Log debug information for netlink_route_multipath
1411 * if debug logging is enabled.
1412 *
1413 * @param cmd: Netlink command which is to be processed
1414 * @param p: Prefix for which the change is due
fa713d9e 1415 * @param family: Address family which the change concerns
45df4e96
DS
1416 * @param zvrf: The vrf we are in
1417 * @param tableid: The table we are working on
fa713d9e 1418 */
86391e56
MS
1419static void _netlink_route_debug(int cmd, const struct prefix *p,
1420 int family, vrf_id_t vrfid,
7556c3fd 1421 uint32_t tableid)
fa713d9e 1422{
d62a17ae 1423 if (IS_ZEBRA_DEBUG_KERNEL) {
1424 char buf[PREFIX_STRLEN];
1425 zlog_debug(
45df4e96
DS
1426 "netlink_route_multipath(): %s %s vrf %u(%u)",
1427 nl_msg_type_to_str(cmd),
1428 prefix2str(p, buf, sizeof(buf)),
86391e56 1429 vrfid, tableid);
d62a17ae 1430 }
1431}
1432
d7c0a89a 1433static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
40c7bdb0 1434{
d62a17ae 1435 if (IS_ZEBRA_DEBUG_KERNEL)
1436 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1437 nl_msg_type_to_str(cmd), label);
fa713d9e
CF
1438}
1439
d62a17ae 1440static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
5895d33f 1441 int llalen, ns_id_t ns_id)
5c610faf 1442{
d62a17ae 1443 struct {
1444 struct nlmsghdr n;
1445 struct ndmsg ndm;
1446 char buf[256];
1447 } req;
5c610faf 1448
5895d33f 1449 struct zebra_ns *zns = zebra_ns_lookup(ns_id);
8f7d9fc0 1450
5605ecfc 1451 memset(&req, 0, sizeof(req));
5c610faf 1452
d62a17ae 1453 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1454 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1455 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1456 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
a55ba23f 1457
d62a17ae 1458 req.ndm.ndm_family = AF_INET;
1459 req.ndm.ndm_state = NUD_PERMANENT;
1460 req.ndm.ndm_ifindex = ifindex;
1461 req.ndm.ndm_type = RTN_UNICAST;
5c610faf 1462
d62a17ae 1463 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1464 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
5c610faf 1465
d62a17ae 1466 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1467 0);
5c610faf
DS
1468}
1469
7cdb1a84
MS
1470/*
1471 * Routing table change via netlink interface, using a dataplane context object
1472 */
25779064 1473static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx)
7cdb1a84
MS
1474{
1475 int bytelen;
7cdb1a84
MS
1476 struct nexthop *nexthop = NULL;
1477 unsigned int nexthop_num;
1478 int family;
1479 const char *routedesc;
1480 int setsrc = 0;
1481 union g_addr src;
1482 const struct prefix *p, *src_p;
1483 uint32_t table_id;
1484
1485 struct {
1486 struct nlmsghdr n;
1487 struct rtmsg r;
1488 char buf[NL_PKT_BUF_SIZE];
1489 } req;
1490
1491 p = dplane_ctx_get_dest(ctx);
1492 src_p = dplane_ctx_get_src(ctx);
1493
1494 family = PREFIX_FAMILY(p);
1495
5709131c 1496 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
7cdb1a84
MS
1497
1498 bytelen = (family == AF_INET ? 4 : 16);
1499
1500 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1501 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1502
334734a8
DS
1503 if ((cmd == RTM_NEWROUTE) &&
1504 ((p->family == AF_INET) || v6_rr_semantics))
1505 req.n.nlmsg_flags |= NLM_F_REPLACE;
7cdb1a84
MS
1506
1507 req.n.nlmsg_type = cmd;
1508
1509 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
1510
1511 req.r.rtm_family = family;
1512 req.r.rtm_dst_len = p->prefixlen;
1513 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1514 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1515
5709131c 1516 if (cmd == RTM_DELROUTE)
7cdb1a84 1517 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
5709131c 1518 else
7cdb1a84 1519 req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
7cdb1a84
MS
1520
1521 /*
1522 * blackhole routes are not RTN_UNICAST, they are
1523 * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
1524 * so setting this value as a RTN_UNICAST would
1525 * cause the route lookup of just the prefix
1526 * to fail. So no need to specify this for
1527 * the RTM_DELROUTE case
1528 */
1529 if (cmd != RTM_DELROUTE)
1530 req.r.rtm_type = RTN_UNICAST;
1531
5709131c 1532 addattr_l(&req.n, sizeof(req), RTA_DST, &p->u.prefix, bytelen);
7cdb1a84 1533 if (src_p)
5709131c 1534 addattr_l(&req.n, sizeof(req), RTA_SRC, &src_p->u.prefix,
7cdb1a84
MS
1535 bytelen);
1536
1537 /* Metric. */
1538 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1539 * used
1540 * either by the kernel or by zebra. Its purely for calculating best
1541 * path(s)
1542 * by the routing protocol and for communicating with protocol peers.
1543 */
5709131c 1544 addattr32(&req.n, sizeof(req), RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
7cdb1a84
MS
1545
1546#if defined(SUPPORT_REALMS)
1547 {
1548 route_tag_t tag;
1549
5709131c 1550 if (cmd == RTM_DELROUTE)
7cdb1a84 1551 tag = dplane_ctx_get_old_tag(ctx);
5709131c 1552 else
7cdb1a84 1553 tag = dplane_ctx_get_tag(ctx);
7cdb1a84
MS
1554
1555 if (tag > 0 && tag <= 255)
5709131c 1556 addattr32(&req.n, sizeof(req), RTA_FLOW, tag);
7cdb1a84
MS
1557 }
1558#endif
1559 /* Table corresponding to this route. */
1560 table_id = dplane_ctx_get_table(ctx);
1561 if (table_id < 256)
1562 req.r.rtm_table = table_id;
1563 else {
1564 req.r.rtm_table = RT_TABLE_UNSPEC;
5709131c 1565 addattr32(&req.n, sizeof(req), RTA_TABLE, table_id);
7cdb1a84
MS
1566 }
1567
1568 _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id);
1569
1570 /*
1571 * If we are not updating the route and we have received
1572 * a route delete, then all we need to fill in is the
1573 * prefix information to tell the kernel to schwack
1574 * it.
1575 */
1576 if (cmd == RTM_DELROUTE)
1577 goto skip;
1578
1579 if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
1580 char buf[NL_PKT_BUF_SIZE];
1581 struct rtattr *rta = (void *)buf;
1582 uint32_t mtu = dplane_ctx_get_mtu(ctx);
1583 uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
5709131c 1584
7cdb1a84
MS
1585 if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
1586 mtu = nexthop_mtu;
1587 rta->rta_type = RTA_METRICS;
1588 rta->rta_len = RTA_LENGTH(0);
5709131c
MS
1589 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1590 RTAX_MTU, &mtu, sizeof(mtu));
7cdb1a84
MS
1591 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1592 RTA_PAYLOAD(rta));
1593 }
1594
1595 /* Count overall nexthops so we can decide whether to use singlepath
5709131c
MS
1596 * or multipath case.
1597 */
7cdb1a84
MS
1598 nexthop_num = 0;
1599 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1600 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1601 continue;
1602 if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1603 continue;
1604
1605 nexthop_num++;
1606 }
1607
1608 /* Singlepath case. */
220f0f42 1609 if (nexthop_num == 1) {
7cdb1a84
MS
1610 nexthop_num = 0;
1611 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1612 /*
1613 * So we want to cover 2 types of blackhole
1614 * routes here:
1615 * 1) A normal blackhole route( ala from a static
1616 * install.
1617 * 2) A recursively resolved blackhole route
1618 */
1619 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1620 switch (nexthop->bh_type) {
1621 case BLACKHOLE_ADMINPROHIB:
1622 req.r.rtm_type = RTN_PROHIBIT;
1623 break;
1624 case BLACKHOLE_REJECT:
1625 req.r.rtm_type = RTN_UNREACHABLE;
1626 break;
1627 default:
1628 req.r.rtm_type = RTN_BLACKHOLE;
1629 break;
1630 }
1631 goto skip;
1632 }
1633 if (CHECK_FLAG(nexthop->flags,
1634 NEXTHOP_FLAG_RECURSIVE)) {
5709131c
MS
1635
1636 if (setsrc)
1637 continue;
1638
1639 if (family == AF_INET) {
1640 if (nexthop->rmap_src.ipv4.s_addr
1641 != 0) {
1642 src.ipv4 =
1643 nexthop->rmap_src.ipv4;
1644 setsrc = 1;
1645 } else if (nexthop->src.ipv4.s_addr
1646 != 0) {
1647 src.ipv4 =
1648 nexthop->src.ipv4;
1649 setsrc = 1;
1650 }
1651 } else if (family == AF_INET6) {
1652 if (!IN6_IS_ADDR_UNSPECIFIED(
1653 &nexthop->rmap_src.ipv6)) {
1654 src.ipv6 =
1655 nexthop->rmap_src.ipv6;
1656 setsrc = 1;
1657 } else if (
1658 !IN6_IS_ADDR_UNSPECIFIED(
1659 &nexthop->src.ipv6)) {
1660 src.ipv6 =
1661 nexthop->src.ipv6;
1662 setsrc = 1;
7cdb1a84
MS
1663 }
1664 }
f183e380 1665 continue;
7cdb1a84
MS
1666 }
1667
1668 if ((cmd == RTM_NEWROUTE
1669 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1670 routedesc = nexthop->rparent
1671 ? "recursive, single-path"
1672 : "single-path";
1673
1674 _netlink_route_build_singlepath(
1675 routedesc, bytelen, nexthop, &req.n,
5709131c 1676 &req.r, sizeof(req), cmd);
7cdb1a84
MS
1677 nexthop_num++;
1678 break;
1679 }
1680 }
1681 if (setsrc && (cmd == RTM_NEWROUTE)) {
1682 if (family == AF_INET)
5709131c 1683 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1684 &src.ipv4, bytelen);
1685 else if (family == AF_INET6)
5709131c 1686 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1687 &src.ipv6, bytelen);
1688 }
1689 } else { /* Multipath case */
1690 char buf[NL_PKT_BUF_SIZE];
1691 struct rtattr *rta = (void *)buf;
1692 struct rtnexthop *rtnh;
81793ac1 1693 const union g_addr *src1 = NULL;
7cdb1a84
MS
1694
1695 rta->rta_type = RTA_MULTIPATH;
1696 rta->rta_len = RTA_LENGTH(0);
1697 rtnh = RTA_DATA(rta);
1698
1699 nexthop_num = 0;
1700 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
7cdb1a84
MS
1701 if (CHECK_FLAG(nexthop->flags,
1702 NEXTHOP_FLAG_RECURSIVE)) {
1703 /* This only works for IPv4 now */
5709131c
MS
1704 if (setsrc)
1705 continue;
1706
1707 if (family == AF_INET) {
1708 if (nexthop->rmap_src.ipv4.s_addr
1709 != 0) {
1710 src.ipv4 =
1711 nexthop->rmap_src.ipv4;
1712 setsrc = 1;
1713 } else if (nexthop->src.ipv4.s_addr
1714 != 0) {
1715 src.ipv4 =
1716 nexthop->src.ipv4;
1717 setsrc = 1;
1718 }
1719 } else if (family == AF_INET6) {
1720 if (!IN6_IS_ADDR_UNSPECIFIED(
1721 &nexthop->rmap_src.ipv6)) {
1722 src.ipv6 =
1723 nexthop->rmap_src.ipv6;
1724 setsrc = 1;
1725 } else if (
1726 !IN6_IS_ADDR_UNSPECIFIED(
1727 &nexthop->src.ipv6)) {
1728 src.ipv6 =
1729 nexthop->src.ipv6;
1730 setsrc = 1;
7cdb1a84
MS
1731 }
1732 }
78e54ded
MS
1733
1734 continue;
7cdb1a84
MS
1735 }
1736
1737 if ((cmd == RTM_NEWROUTE
1738 && NEXTHOP_IS_ACTIVE(nexthop->flags))) {
1739 routedesc = nexthop->rparent
1740 ? "recursive, multipath"
1741 : "multipath";
1742 nexthop_num++;
1743
1744 _netlink_route_build_multipath(
1745 routedesc, bytelen, nexthop, rta, rtnh,
1746 &req.r, &src1);
1747 rtnh = RTNH_NEXT(rtnh);
1748
1749 if (!setsrc && src1) {
1750 if (family == AF_INET)
1751 src.ipv4 = src1->ipv4;
1752 else if (family == AF_INET6)
1753 src.ipv6 = src1->ipv6;
1754
1755 setsrc = 1;
1756 }
1757 }
1758 }
1759 if (setsrc && (cmd == RTM_NEWROUTE)) {
1760 if (family == AF_INET)
5709131c 1761 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1762 &src.ipv4, bytelen);
1763 else if (family == AF_INET6)
5709131c 1764 addattr_l(&req.n, sizeof(req), RTA_PREFSRC,
7cdb1a84
MS
1765 &src.ipv6, bytelen);
1766 if (IS_ZEBRA_DEBUG_KERNEL)
1767 zlog_debug("Setting source");
1768 }
1769
1770 if (rta->rta_len > RTA_LENGTH(0))
1771 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1772 RTA_DATA(rta), RTA_PAYLOAD(rta));
1773 }
1774
1775 /* If there is no useful nexthop then return. */
1776 if (nexthop_num == 0) {
1777 if (IS_ZEBRA_DEBUG_KERNEL)
1778 zlog_debug(
1779 "netlink_route_multipath(): No useful nexthop.");
1780 return 0;
1781 }
1782
1783skip:
7cdb1a84
MS
1784 /* Talk to netlink socket. */
1785 return netlink_talk_info(netlink_talk_filter, &req.n,
1786 dplane_ctx_get_ns(ctx), 0);
1787}
1788
43b5cc5e 1789int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
e3be0432 1790{
5523c156 1791 uint32_t actual_table;
d62a17ae 1792 int suc = 0;
1793 struct mcast_route_data *mr = (struct mcast_route_data *)in;
bd8b9272
DS
1794 struct {
1795 struct nlmsghdr n;
1796 struct ndmsg ndm;
1797 char buf[256];
1798 } req;
e3be0432 1799
d62a17ae 1800 mroute = mr;
5895d33f 1801 struct zebra_ns *zns;
bd8b9272 1802
009f8ad5 1803 zns = zvrf->zns;
5605ecfc 1804 memset(&req, 0, sizeof(req));
bd8b9272
DS
1805
1806 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1807 req.n.nlmsg_flags = NLM_F_REQUEST;
1808 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1809
1810 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1811 req.n.nlmsg_type = RTM_GETROUTE;
1812
1813 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1814 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1815 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1816 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
5523c156
DS
1817 /*
1818 * What?
1819 *
1820 * So during the namespace cleanup we started storing
1821 * the zvrf table_id for the default table as RT_TABLE_MAIN
1822 * which is what the normal routing table for ip routing is.
1823 * This change caused this to break our lookups of sg data
1824 * because prior to this change the zvrf->table_id was 0
1825 * and when the pim multicast kernel code saw a 0,
1826 * it was auto-translated to RT_TABLE_DEFAULT. But since
1827 * we are now passing in RT_TABLE_MAIN there is no auto-translation
1828 * and the kernel goes screw you and the delicious cookies you
1829 * are trying to give me. So now we have this little hack.
1830 */
1831 actual_table = (zvrf->table_id == RT_TABLE_MAIN) ? RT_TABLE_DEFAULT :
1832 zvrf->table_id;
1833 addattr_l(&req.n, sizeof(req), RTA_TABLE, &actual_table, 4);
e3be0432 1834
bd8b9272
DS
1835 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1836 &zns->netlink_cmd, zns, 0);
e3be0432 1837
bd8b9272 1838 mroute = NULL;
d62a17ae 1839 return suc;
e3be0432
DS
1840}
1841
7cdb1a84
MS
1842/*
1843 * Update or delete a prefix from the kernel,
1844 * using info from a dataplane context.
1845 */
25779064 1846enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
7cdb1a84
MS
1847{
1848 int cmd, ret;
1849 const struct prefix *p = dplane_ctx_get_dest(ctx);
f183e380 1850 struct nexthop *nexthop;
7cdb1a84
MS
1851
1852 if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
1853 cmd = RTM_DELROUTE;
1854 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
1855 cmd = RTM_NEWROUTE;
1856 } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
1857
1858 if (p->family == AF_INET || v6_rr_semantics) {
1859 /* Single 'replace' operation */
1860 cmd = RTM_NEWROUTE;
fe5f21af
DS
1861
1862 /*
1863 * With route replace semantics in place
1864 * for v4 routes and the new route is a system
1865 * route we do not install anything.
1866 * The problem here is that the new system
1867 * route should cause us to withdraw from
1868 * the kernel the old non-system route
1869 */
1870 if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) &&
1871 !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
1872 (void)netlink_route_multipath(RTM_DELROUTE,
1873 ctx);
7cdb1a84
MS
1874 } else {
1875 /*
1876 * So v6 route replace semantics are not in
1877 * the kernel at this point as I understand it.
1878 * so let's do a delete then an add.
1879 * In the future once v6 route replace semantics
1880 * are in we can figure out what to do here to
1881 * allow working with old and new kernels.
1882 *
1883 * I'm also intentionally ignoring the failure case
1884 * of the route delete. If that happens yeah we're
1885 * screwed.
1886 */
3cdba47a
DS
1887 if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
1888 (void)netlink_route_multipath(RTM_DELROUTE,
1889 ctx);
7cdb1a84
MS
1890 cmd = RTM_NEWROUTE;
1891 }
1892
1893 } else {
1894 return ZEBRA_DPLANE_REQUEST_FAILURE;
1895 }
1896
3cdba47a
DS
1897 if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
1898 ret = netlink_route_multipath(cmd, ctx);
1899 else
1900 ret = 0;
f183e380
MS
1901 if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
1902 /* Update installed nexthops to signal which have been
1903 * installed.
1904 */
1905 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
1906 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1907 continue;
1908
1909 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
1910 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
f183e380
MS
1911 }
1912 }
1913 }
7cdb1a84
MS
1914
1915 return (ret == 0 ?
1916 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
1917}
1918
d62a17ae 1919int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
5895d33f 1920 int llalen, ns_id_t ns_id)
6b8a5694 1921{
d62a17ae 1922 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
5895d33f 1923 addr, lla, llalen, ns_id);
6b8a5694 1924}
718e3744 1925
13d60d35 1926/*
1927 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1928 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1929 */
0bbd4ff4
MS
1930static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx,
1931 int cmd)
13d60d35 1932{
d62a17ae 1933 struct {
1934 struct nlmsghdr n;
1935 struct ndmsg ndm;
1936 char buf[256];
1937 } req;
d7c0a89a 1938 uint8_t dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
0bbd4ff4 1939 const struct ipaddr *addr;
d62a17ae 1940
5605ecfc 1941 memset(&req, 0, sizeof(req));
d62a17ae 1942
1943 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1944 req.n.nlmsg_flags = NLM_F_REQUEST;
1945 if (cmd == RTM_NEWNEIGH)
1946 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1947 req.n.nlmsg_type = cmd;
1948 req.ndm.ndm_family = PF_BRIDGE;
1949 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1950 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1951
1952
1953 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
0bbd4ff4 1954 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
13d60d35 1955
0bbd4ff4 1956 addr = dplane_ctx_neigh_get_ipaddr(ctx);
13d60d35 1957
0bbd4ff4 1958 addattr_l(&req.n, sizeof(req), NDA_DST, &(addr->ipaddr_v4), 4);
13d60d35 1959
0bbd4ff4
MS
1960 return netlink_talk_info(netlink_talk_filter, &req.n,
1961 dplane_ctx_get_ns(ctx), 0);
13d60d35 1962}
1963
2232a77c 1964#ifndef NDA_RTA
d62a17ae 1965#define NDA_RTA(r) \
1966 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
2232a77c 1967#endif
1968
2414abd3 1969static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 1970{
d62a17ae 1971 struct ndmsg *ndm;
1972 struct interface *ifp;
1973 struct zebra_if *zif;
d62a17ae 1974 struct rtattr *tb[NDA_MAX + 1];
1975 struct interface *br_if;
1976 struct ethaddr mac;
1977 vlanid_t vid = 0;
1978 struct prefix vtep_ip;
1979 int vid_present = 0, dst_present = 0;
1980 char buf[ETHER_ADDR_STRLEN];
1981 char vid_buf[20];
1982 char dst_buf[30];
a37f4598 1983 bool sticky;
d62a17ae 1984
1985 ndm = NLMSG_DATA(h);
1986
2853fed6 1987 /* We only process macfdb notifications if EVPN is enabled */
1988 if (!is_evpn_enabled())
1989 return 0;
1990
d62a17ae 1991 /* The interface should exist. */
5895d33f 1992 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
d62a17ae 1993 ndm->ndm_ifindex);
28bd0652
DS
1994 if (!ifp || !ifp->info) {
1995 if (IS_ZEBRA_DEBUG_KERNEL)
1996 zlog_debug("\t%s without associated interface: %u",
1997 __PRETTY_FUNCTION__, ndm->ndm_ifindex);
d62a17ae 1998 return 0;
28bd0652 1999 }
d62a17ae 2000
2001 /* The interface should be something we're interested in. */
28bd0652
DS
2002 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) {
2003 if (IS_ZEBRA_DEBUG_KERNEL)
2004 zlog_debug("\t%s Not interested in %s, not a slave",
2005 __PRETTY_FUNCTION__, ifp->name);
d62a17ae 2006 return 0;
28bd0652 2007 }
d62a17ae 2008
2009 /* Drop "permanent" entries. */
28bd0652
DS
2010 if (ndm->ndm_state & NUD_PERMANENT) {
2011 if (IS_ZEBRA_DEBUG_KERNEL)
2012 zlog_debug("\t%s Entry is PERMANENT, dropping",
2013 __PRETTY_FUNCTION__);
d62a17ae 2014 return 0;
28bd0652 2015 }
d62a17ae 2016
2017 zif = (struct zebra_if *)ifp->info;
2018 if ((br_if = zif->brslave_info.br_if) == NULL) {
28bd0652
DS
2019 if (IS_ZEBRA_DEBUG_KERNEL)
2020 zlog_debug(
2021 "%s family %s IF %s(%u) brIF %u - no bridge master",
2022 nl_msg_type_to_str(h->nlmsg_type),
2023 nl_family_to_str(ndm->ndm_family), ifp->name,
2024 ndm->ndm_ifindex,
2025 zif->brslave_info.bridge_ifindex);
d62a17ae 2026 return 0;
2027 }
2028
2029 /* Parse attributes and extract fields of interest. */
2030 memset(tb, 0, sizeof tb);
2031 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2032
2033 if (!tb[NDA_LLADDR]) {
28bd0652
DS
2034 if (IS_ZEBRA_DEBUG_KERNEL)
2035 zlog_debug("%s family %s IF %s(%u) brIF %u - no LLADDR",
2036 nl_msg_type_to_str(h->nlmsg_type),
2037 nl_family_to_str(ndm->ndm_family), ifp->name,
2038 ndm->ndm_ifindex,
2039 zif->brslave_info.bridge_ifindex);
d62a17ae 2040 return 0;
2041 }
2042
ff8b7eb8 2043 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
2044 if (IS_ZEBRA_DEBUG_KERNEL)
2045 zlog_debug(
2046 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
2047 nl_msg_type_to_str(h->nlmsg_type),
2048 nl_family_to_str(ndm->ndm_family), ifp->name,
2049 ndm->ndm_ifindex,
2050 zif->brslave_info.bridge_ifindex,
2051 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
d62a17ae 2052 return 0;
2053 }
2054
ff8b7eb8 2055 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 2056
2057 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
2058 vid_present = 1;
d7c0a89a 2059 vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
d62a17ae 2060 sprintf(vid_buf, " VLAN %u", vid);
2061 }
2062
2063 if (tb[NDA_DST]) {
2064 /* TODO: Only IPv4 supported now. */
2065 dst_present = 1;
2066 vtep_ip.family = AF_INET;
2067 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
2068 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
2069 IPV4_MAX_BYTELEN);
2070 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
2071 }
2072
a37f4598 2073 sticky = !!(ndm->ndm_state & NUD_NOARP);
d62a17ae 2074
2075 if (IS_ZEBRA_DEBUG_KERNEL)
2076 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
2077 nl_msg_type_to_str(h->nlmsg_type),
2078 nl_family_to_str(ndm->ndm_family), ifp->name,
2079 ndm->ndm_ifindex, vid_present ? vid_buf : "",
2080 sticky ? "sticky " : "",
2081 prefix_mac2str(&mac, buf, sizeof(buf)),
2082 dst_present ? dst_buf : "");
2083
28bd0652
DS
2084 if (filter_vlan && vid != filter_vlan) {
2085 if (IS_ZEBRA_DEBUG_KERNEL)
2086 zlog_debug("\tFiltered due to filter vlan: %d",
2087 filter_vlan);
d62a17ae 2088 return 0;
28bd0652 2089 }
d62a17ae 2090
2091 /* If add or update, do accordingly if learnt on a "local" interface; if
2092 * the notification is over VxLAN, this has to be related to
2093 * multi-homing,
2094 * so perform an implicit delete of any local entry (if it exists).
2095 */
2096 if (h->nlmsg_type == RTM_NEWNEIGH) {
d62a17ae 2097 if (IS_ZEBRA_IF_VXLAN(ifp))
2098 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
2099 vid);
2100
2101 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
2102 sticky);
2103 }
2104
2105 /* This is a delete notification.
2106 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
2107 * 2. For a MAC over "local" interface, delete the mac
2108 * Note: We will get notifications from both bridge driver and VxLAN
2109 * driver.
2110 * Ignore the notification from VxLan driver as it is also generated
2111 * when mac moves from remote to local.
2112 */
28bd0652
DS
2113 if (dst_present) {
2114 if (IS_ZEBRA_DEBUG_KERNEL)
2115 zlog_debug("\tNo Destination Present");
d62a17ae 2116 return 0;
28bd0652 2117 }
d62a17ae 2118
2119 if (IS_ZEBRA_IF_VXLAN(ifp))
2120 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
2121 vid);
2122
2123 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
2232a77c 2124}
2125
2414abd3 2126static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 2127{
d62a17ae 2128 int len;
2129 struct ndmsg *ndm;
2232a77c 2130
d62a17ae 2131 if (h->nlmsg_type != RTM_NEWNEIGH)
2132 return 0;
2232a77c 2133
d62a17ae 2134 /* Length validity. */
2135 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2136 if (len < 0)
2137 return -1;
2232a77c 2138
d62a17ae 2139 /* We are interested only in AF_BRIDGE notifications. */
2140 ndm = NLMSG_DATA(h);
2141 if (ndm->ndm_family != AF_BRIDGE)
2142 return 0;
2232a77c 2143
2414abd3 2144 return netlink_macfdb_change(h, len, ns_id);
2232a77c 2145}
2146
2147/* Request for MAC FDB information from the kernel */
85a75f1e
MS
2148static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
2149 int type, ifindex_t master_ifindex)
2232a77c 2150{
d62a17ae 2151 struct {
2152 struct nlmsghdr n;
2153 struct ifinfomsg ifm;
2154 char buf[256];
2155 } req;
2156
2157 /* Form the request, specifying filter (rtattr) if needed. */
2158 memset(&req, 0, sizeof(req));
2159 req.n.nlmsg_type = type;
718f9b0f 2160 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 2161 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
2162 req.ifm.ifi_family = family;
2163 if (master_ifindex)
2164 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
2165
85a75f1e 2166 return netlink_request(netlink_cmd, &req.n);
2232a77c 2167}
2168
2169/*
2170 * MAC forwarding database read using netlink interface. This is invoked
2171 * at startup.
2172 */
d62a17ae 2173int netlink_macfdb_read(struct zebra_ns *zns)
2232a77c 2174{
d62a17ae 2175 int ret;
85a75f1e
MS
2176 struct zebra_dplane_info dp_info;
2177
2178 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 2179
2180 /* Get bridge FDB table. */
85a75f1e
MS
2181 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
2182 0);
d62a17ae 2183 if (ret < 0)
2184 return ret;
2185 /* We are reading entire table. */
2186 filter_vlan = 0;
85a75f1e
MS
2187 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2188 &dp_info, 0, 1);
d62a17ae 2189
2190 return ret;
2232a77c 2191}
2192
2193/*
2194 * MAC forwarding database read using netlink interface. This is for a
2195 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
2196 */
d62a17ae 2197int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
2198 struct interface *br_if)
2232a77c 2199{
d62a17ae 2200 struct zebra_if *br_zif;
2201 struct zebra_if *zif;
2202 struct zebra_l2info_vxlan *vxl;
85a75f1e 2203 struct zebra_dplane_info dp_info;
d62a17ae 2204 int ret = 0;
2205
85a75f1e 2206 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
d62a17ae 2207
2208 /* Save VLAN we're filtering on, if needed. */
2209 br_zif = (struct zebra_if *)br_if->info;
2210 zif = (struct zebra_if *)ifp->info;
2211 vxl = &zif->l2info.vxl;
2212 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
2213 filter_vlan = vxl->access_vlan;
2214
2215 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2216 */
85a75f1e 2217 ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
d62a17ae 2218 br_if->ifindex);
2219 if (ret < 0)
2220 return ret;
85a75f1e
MS
2221 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2222 &dp_info, 0, 0);
d62a17ae 2223
2224 /* Reset VLAN filter. */
2225 filter_vlan = 0;
2226 return ret;
2232a77c 2227}
2228
67fb9374
CS
2229
2230/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
2231static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns,
2232 int family,
2233 int type,
2234 struct interface *br_if,
2235 struct ethaddr *mac,
2236 vlanid_t vid)
2237{
2238 struct {
2239 struct nlmsghdr n;
2240 struct ndmsg ndm;
2241 char buf[256];
2242 } req;
2243 struct zebra_if *br_zif;
2244 char buf[ETHER_ADDR_STRLEN];
2245
2246 memset(&req, 0, sizeof(req));
2247 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2248 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
2249 req.n.nlmsg_flags = NLM_F_REQUEST;
2250 req.ndm.ndm_family = family; /* AF_BRIDGE */
2251 /* req.ndm.ndm_state = NUD_REACHABLE; */
2252
2253 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2254
2255 br_zif = (struct zebra_if *)br_if->info;
2256 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0)
2257 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2258
2259 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2260
2261 if (IS_ZEBRA_DEBUG_KERNEL)
2262 zlog_debug("%s: Tx family %s IF %s(%u) MAC %s vid %u",
2263 __PRETTY_FUNCTION__,
2264 nl_family_to_str(req.ndm.ndm_family), br_if->name,
2265 br_if->ifindex,
2266 prefix_mac2str(mac, buf, sizeof(buf)), vid);
2267
2268 return netlink_request(&zns->netlink_cmd, &req.n);
2269}
2270
2271int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
2272 struct interface *br_if,
2273 struct ethaddr *mac, vlanid_t vid)
2274{
2275 int ret = 0;
2276 struct zebra_dplane_info dp_info;
2277
2278 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2279
2280 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
2281 */
2282 ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE,
2283 RTM_GETNEIGH,
2284 br_if, mac, vid);
2285 if (ret < 0)
2286 return ret;
2287
2288 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
2289 &dp_info, 1, 0);
2290
2291 return ret;
2292}
036d93c0
MS
2293
2294/*
2295 * Netlink-specific handler for MAC updates using dataplane context object.
2296 */
2297static enum zebra_dplane_result
2298netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 2299{
d62a17ae 2300 struct {
2301 struct nlmsghdr n;
2302 struct ndmsg ndm;
2303 char buf[256];
2304 } req;
036d93c0 2305 int ret;
d62a17ae 2306 int dst_alen;
2307 struct zebra_if *zif;
2308 struct interface *br_if;
2309 struct zebra_if *br_zif;
36590076 2310 int vid_present = 0;
d62a17ae 2311 char vid_buf[20];
036d93c0
MS
2312 struct zebra_ns *zns;
2313 struct interface *ifp;
2314 int cmd;
2315 struct in_addr vtep_ip;
2316 vlanid_t vid;
2317
2318 if (dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL)
2319 cmd = RTM_NEWNEIGH;
2320 else
2321 cmd = RTM_DELNEIGH;
2322
2323 /* Locate zebra ns and interface objects from context data */
2324 zns = zebra_ns_lookup(dplane_ctx_get_ns(ctx)->ns_id);
2325 if (zns == NULL) {
2326 /* Nothing to be done */
2327 if (IS_ZEBRA_DEBUG_KERNEL)
2328 zlog_debug("MAC %s on IF %s(%u) - zebra ns unknown",
2329 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2330 dplane_ctx_get_ifname(ctx),
2331 dplane_ctx_get_ifindex(ctx));
2332
2333 return ZEBRA_DPLANE_REQUEST_FAILURE;
2334 }
2335
2336 ifp = if_lookup_by_index_per_ns(zns, dplane_ctx_get_ifindex(ctx));
2337 if (ifp == NULL) {
2338 /* Nothing to be done */
2339 /* Nothing to be done */
2340 if (IS_ZEBRA_DEBUG_KERNEL)
2341 zlog_debug("MAC %s on IF %s(%u) - interface unknown",
2342 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2343 dplane_ctx_get_ifname(ctx),
2344 dplane_ctx_get_ifindex(ctx));
2345 return ZEBRA_DPLANE_REQUEST_FAILURE;
2346 }
2347
2348 vid = dplane_ctx_mac_get_vlan(ctx);
d62a17ae 2349
2350 zif = ifp->info;
2351 if ((br_if = zif->brslave_info.br_if) == NULL) {
036d93c0
MS
2352 if (IS_ZEBRA_DEBUG_KERNEL)
2353 zlog_debug("MAC %s on IF %s(%u) - no mapping to bridge",
2354 (cmd == RTM_NEWNEIGH) ? "add" : "del",
2355 ifp->name, ifp->ifindex);
2356 return ZEBRA_DPLANE_REQUEST_FAILURE;
d62a17ae 2357 }
2358
5605ecfc 2359 memset(&req, 0, sizeof(req));
d62a17ae 2360
2361 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2362 req.n.nlmsg_flags = NLM_F_REQUEST;
2363 if (cmd == RTM_NEWNEIGH)
2364 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2365 req.n.nlmsg_type = cmd;
2366 req.ndm.ndm_family = AF_BRIDGE;
2367 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2368 req.ndm.ndm_state = NUD_REACHABLE;
2369
036d93c0 2370 if (dplane_ctx_mac_is_sticky(ctx))
d62a17ae 2371 req.ndm.ndm_state |= NUD_NOARP;
2372 else
2373 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2374
036d93c0
MS
2375 addattr_l(&req.n, sizeof(req), NDA_LLADDR,
2376 dplane_ctx_mac_get_addr(ctx), 6);
d62a17ae 2377 req.ndm.ndm_ifindex = ifp->ifindex;
036d93c0 2378
d63c1b18 2379 dst_alen = 4; // TODO: hardcoded
036d93c0 2380 vtep_ip = *(dplane_ctx_mac_get_vtep_ip(ctx));
d63c1b18 2381 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
036d93c0 2382
d62a17ae 2383 br_zif = (struct zebra_if *)br_if->info;
2384 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2385 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2386 vid_present = 1;
2387 sprintf(vid_buf, " VLAN %u", vid);
2388 }
2389 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2390
036d93c0
MS
2391 if (IS_ZEBRA_DEBUG_KERNEL) {
2392 char ipbuf[PREFIX_STRLEN];
2393 char buf[ETHER_ADDR_STRLEN];
2394 char dst_buf[PREFIX_STRLEN + 10];
2395
2396 inet_ntop(AF_INET, &vtep_ip, ipbuf, sizeof(ipbuf));
2397 snprintf(dst_buf, sizeof(dst_buf), " dst %s", ipbuf);
2398 prefix_mac2str(dplane_ctx_mac_get_addr(ctx), buf, sizeof(buf));
2399
d62a17ae 2400 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2401 nl_msg_type_to_str(cmd),
2402 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2403 ifp->ifindex, vid_present ? vid_buf : "",
036d93c0
MS
2404 dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
2405 buf, dst_buf);
2406 }
d62a17ae 2407
036d93c0
MS
2408 ret = netlink_talk_info(netlink_talk_filter, &req.n,
2409 dplane_ctx_get_ns(ctx), 0);
2410 if (ret == 0)
2411 return ZEBRA_DPLANE_REQUEST_SUCCESS;
2412 else
2413 return ZEBRA_DPLANE_REQUEST_FAILURE;
2232a77c 2414}
2415
f17b99ed
DS
2416/*
2417 * In the event the kernel deletes ipv4 link-local neighbor entries created for
2418 * 5549 support, re-install them.
2419 */
2420static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
9b036974
DS
2421 struct interface *ifp, struct ipaddr *ip,
2422 bool handle_failed)
f17b99ed
DS
2423{
2424 if (ndm->ndm_family != AF_INET)
2425 return;
2426
2427 if (!zif->v6_2_v4_ll_neigh_entry)
2428 return;
2429
2430 if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
2431 return;
2432
9b036974
DS
2433 if (handle_failed && ndm->ndm_state & NUD_FAILED) {
2434 zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
2435 ifp->name);
2436 return;
2437 }
2438
f17b99ed
DS
2439 if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
2440}
2441
d62a17ae 2442#define NUD_VALID \
2443 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2444 | NUD_DELAY)
2232a77c 2445
2414abd3 2446static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
2232a77c 2447{
d62a17ae 2448 struct ndmsg *ndm;
2449 struct interface *ifp;
2450 struct zebra_if *zif;
d62a17ae 2451 struct rtattr *tb[NDA_MAX + 1];
2452 struct interface *link_if;
2453 struct ethaddr mac;
2454 struct ipaddr ip;
2455 char buf[ETHER_ADDR_STRLEN];
2456 char buf2[INET6_ADDRSTRLEN];
2457 int mac_present = 0;
a37f4598 2458 bool is_ext;
2459 bool is_router;
d62a17ae 2460
2461 ndm = NLMSG_DATA(h);
2462
2463 /* The interface should exist. */
5895d33f 2464 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
d62a17ae 2465 ndm->ndm_ifindex);
2853fed6 2466 if (!ifp || !ifp->info)
d62a17ae 2467 return 0;
2468
20089ae2
DS
2469 zif = (struct zebra_if *)ifp->info;
2470
2471 /* Parse attributes and extract fields of interest. */
2472 memset(tb, 0, sizeof tb);
2473 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2474
2475 if (!tb[NDA_DST]) {
9df414fe
QY
2476 zlog_debug("%s family %s IF %s(%u) - no DST",
2477 nl_msg_type_to_str(h->nlmsg_type),
2478 nl_family_to_str(ndm->ndm_family), ifp->name,
2479 ndm->ndm_ifindex);
d62a17ae 2480 return 0;
20089ae2
DS
2481 }
2482
2483 memset(&ip, 0, sizeof(struct ipaddr));
2484 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2485 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2486
f17b99ed
DS
2487 /* if kernel deletes our rfc5549 neighbor entry, re-install it */
2488 if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
9b036974 2489 netlink_handle_5549(ndm, zif, ifp, &ip, false);
28bd0652
DS
2490 if (IS_ZEBRA_DEBUG_KERNEL)
2491 zlog_debug(
2492 "\tNeighbor Entry Received is a 5549 entry, finished");
20089ae2
DS
2493 return 0;
2494 }
d62a17ae 2495
f17b99ed 2496 /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
9b036974
DS
2497 if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
2498 netlink_handle_5549(ndm, zif, ifp, &ip, true);
f17b99ed 2499
d62a17ae 2500 /* The neighbor is present on an SVI. From this, we locate the
2501 * underlying
2502 * bridge because we're only interested in neighbors on a VxLAN bridge.
2503 * The bridge is located based on the nature of the SVI:
2504 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2505 * interface
2506 * and is linked to the bridge
2507 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2508 * inteface
2509 * itself
2510 */
2511 if (IS_ZEBRA_IF_VLAN(ifp)) {
5895d33f 2512 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
71349e03 2513 zif->link_ifindex);
d62a17ae 2514 if (!link_if)
2515 return 0;
2516 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2517 link_if = ifp;
28bd0652
DS
2518 else {
2519 if (IS_ZEBRA_DEBUG_KERNEL)
2520 zlog_debug(
2521 "\tNeighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
d62a17ae 2522 return 0;
28bd0652 2523 }
d62a17ae 2524
d62a17ae 2525 memset(&mac, 0, sizeof(struct ethaddr));
d62a17ae 2526 if (h->nlmsg_type == RTM_NEWNEIGH) {
2527 if (tb[NDA_LLADDR]) {
ff8b7eb8 2528 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
28bd0652
DS
2529 if (IS_ZEBRA_DEBUG_KERNEL)
2530 zlog_debug(
2531 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2532 nl_msg_type_to_str(
2533 h->nlmsg_type),
2534 nl_family_to_str(
2535 ndm->ndm_family),
2536 ifp->name, ndm->ndm_ifindex,
2537 (unsigned long)RTA_PAYLOAD(
2538 tb[NDA_LLADDR]));
d62a17ae 2539 return 0;
2540 }
2541
2542 mac_present = 1;
ff8b7eb8 2543 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
d62a17ae 2544 }
2545
a37f4598 2546 is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
2547 is_router = !!(ndm->ndm_flags & NTF_ROUTER);
d62a17ae 2548
2549 if (IS_ZEBRA_DEBUG_KERNEL)
2550 zlog_debug(
2551 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2552 nl_msg_type_to_str(h->nlmsg_type),
2553 nl_family_to_str(ndm->ndm_family), ifp->name,
2554 ndm->ndm_ifindex,
2555 ipaddr2str(&ip, buf2, sizeof(buf2)),
2556 mac_present
2557 ? prefix_mac2str(&mac, buf, sizeof(buf))
2558 : "",
2559 ndm->ndm_state, ndm->ndm_flags);
2560
2561 /* If the neighbor state is valid for use, process as an add or
2562 * update
2563 * else process as a delete. Note that the delete handling may
2564 * result
2565 * in re-adding the neighbor if it is a valid "remote" neighbor.
2566 */
2567 if (ndm->ndm_state & NUD_VALID)
ee69da27 2568 return zebra_vxlan_handle_kernel_neigh_update(
d62a17ae 2569 ifp, link_if, &ip, &mac, ndm->ndm_state,
a37f4598 2570 is_ext, is_router);
d62a17ae 2571
ee69da27 2572 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
d62a17ae 2573 }
2574
2575 if (IS_ZEBRA_DEBUG_KERNEL)
2576 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2577 nl_msg_type_to_str(h->nlmsg_type),
2578 nl_family_to_str(ndm->ndm_family), ifp->name,
2579 ndm->ndm_ifindex,
2580 ipaddr2str(&ip, buf2, sizeof(buf2)));
2581
2582 /* Process the delete - it may result in re-adding the neighbor if it is
2583 * a valid "remote" neighbor.
2584 */
ee69da27 2585 return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
2232a77c 2586}
2587
2414abd3 2588static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
2232a77c 2589{
d62a17ae 2590 int len;
2591 struct ndmsg *ndm;
2232a77c 2592
d62a17ae 2593 if (h->nlmsg_type != RTM_NEWNEIGH)
2594 return 0;
2232a77c 2595
d62a17ae 2596 /* Length validity. */
2597 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2598 if (len < 0)
2599 return -1;
2232a77c 2600
d62a17ae 2601 /* We are interested only in AF_INET or AF_INET6 notifications. */
2602 ndm = NLMSG_DATA(h);
2603 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2604 return 0;
2232a77c 2605
2414abd3 2606 return netlink_neigh_change(h, len);
2232a77c 2607}
2608
2609/* Request for IP neighbor information from the kernel */
85a75f1e
MS
2610static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
2611 int type, ifindex_t ifindex)
2232a77c 2612{
d62a17ae 2613 struct {
2614 struct nlmsghdr n;
2615 struct ndmsg ndm;
2616 char buf[256];
2617 } req;
2618
2619 /* Form the request, specifying filter (rtattr) if needed. */
2620 memset(&req, 0, sizeof(req));
2621 req.n.nlmsg_type = type;
718f9b0f 2622 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
d62a17ae 2623 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2624 req.ndm.ndm_family = family;
2625 if (ifindex)
2626 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2627
85a75f1e 2628 return netlink_request(netlink_cmd, &req.n);
2232a77c 2629}
2630
2631/*
2632 * IP Neighbor table read using netlink interface. This is invoked
2633 * at startup.
2634 */
d62a17ae 2635int netlink_neigh_read(struct zebra_ns *zns)
2232a77c 2636{
d62a17ae 2637 int ret;
85a75f1e
MS
2638 struct zebra_dplane_info dp_info;
2639
2640 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 2641
d62a17ae 2642 /* Get IP neighbor table. */
85a75f1e
MS
2643 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
2644 0);
d62a17ae 2645 if (ret < 0)
2646 return ret;
85a75f1e
MS
2647 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2648 &dp_info, 0, 1);
2232a77c 2649
d62a17ae 2650 return ret;
2232a77c 2651}
2652
2653/*
2654 * IP Neighbor table read using netlink interface. This is for a specific
2655 * VLAN device.
2656 */
d62a17ae 2657int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2232a77c 2658{
d62a17ae 2659 int ret = 0;
85a75f1e
MS
2660 struct zebra_dplane_info dp_info;
2661
2662 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2232a77c 2663
85a75f1e 2664 ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
d62a17ae 2665 vlan_if->ifindex);
2666 if (ret < 0)
2667 return ret;
85a75f1e
MS
2668 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2669 &dp_info, 0, 0);
2232a77c 2670
d62a17ae 2671 return ret;
2232a77c 2672}
2673
67fb9374
CS
2674/*
2675 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
2676 * read using netlink interface.
2677 */
2678static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
2679 int type, struct ipaddr *ip,
2680 ifindex_t ifindex)
2681{
2682 struct {
2683 struct nlmsghdr n;
2684 struct ndmsg ndm;
2685 char buf[256];
2686 } req;
2687 int ipa_len;
2688
2689 /* Form the request, specifying filter (rtattr) if needed. */
2690 memset(&req, 0, sizeof(req));
2691 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2692 req.n.nlmsg_flags = NLM_F_REQUEST;
2693 req.n.nlmsg_type = type; /* RTM_GETNEIGH */
2694 req.ndm.ndm_ifindex = ifindex;
2695
2696 if (IS_IPADDR_V4(ip)) {
2697 ipa_len = IPV4_MAX_BYTELEN;
2698 req.ndm.ndm_family = AF_INET;
2699
2700 } else {
2701 ipa_len = IPV6_MAX_BYTELEN;
2702 req.ndm.ndm_family = AF_INET6;
2703 }
2704
2705 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2706
2707 return netlink_request(&zns->netlink_cmd, &req.n);
2708}
2709
2710int netlink_neigh_read_specific_ip(struct ipaddr *ip,
2711 struct interface *vlan_if)
2712{
2713 int ret = 0;
2714 struct zebra_ns *zns;
2715 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vlan_if->vrf_id);
2716 char buf[INET6_ADDRSTRLEN];
2717 struct zebra_dplane_info dp_info;
2718
2719 zns = zvrf->zns;
2720
2721 zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
2722
2723 if (IS_ZEBRA_DEBUG_KERNEL)
2724 zlog_debug("%s: neigh request IF %s(%u) IP %s vrf_id %u",
2725 __PRETTY_FUNCTION__, vlan_if->name,
2726 vlan_if->ifindex,
2727 ipaddr2str(ip, buf, sizeof(buf)),
2728 vlan_if->vrf_id);
2729
2730 ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
2731 vlan_if->ifindex);
2732 if (ret < 0)
2733 return ret;
2734
2735 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
2736 &dp_info, 1, 0);
2737
2738 return ret;
2739}
2740
2414abd3 2741int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
2232a77c 2742{
d62a17ae 2743 int len;
2744 struct ndmsg *ndm;
2232a77c 2745
d62a17ae 2746 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2747 return 0;
2232a77c 2748
d62a17ae 2749 /* Length validity. */
2750 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
9bdf8618
DS
2751 if (len < 0) {
2752 zlog_err("%s: Message received from netlink is of a broken size %d %zu",
2753 __PRETTY_FUNCTION__, h->nlmsg_len,
2754 (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
d62a17ae 2755 return -1;
9bdf8618 2756 }
2232a77c 2757
d62a17ae 2758 /* Is this a notification for the MAC FDB or IP neighbor table? */
2759 ndm = NLMSG_DATA(h);
2760 if (ndm->ndm_family == AF_BRIDGE)
2414abd3 2761 return netlink_macfdb_change(h, len, ns_id);
2232a77c 2762
d62a17ae 2763 if (ndm->ndm_type != RTN_UNICAST)
2764 return 0;
2232a77c 2765
d62a17ae 2766 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2414abd3 2767 return netlink_ipneigh_change(h, len, ns_id);
8a1b681c 2768 else {
9df414fe 2769 flog_warn(
e914ccbe 2770 EC_ZEBRA_UNKNOWN_FAMILY,
87b5d1b0
DS
2771 "Invalid address family: %u received from kernel neighbor change: %s",
2772 ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
8a1b681c
SW
2773 return 0;
2774 }
2232a77c 2775
d62a17ae 2776 return 0;
2232a77c 2777}
2778
931fa60c
MS
2779/*
2780 * Utility neighbor-update function, using info from dplane context.
2781 */
2782static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
2783 int cmd)
2232a77c 2784{
d62a17ae 2785 struct {
2786 struct nlmsghdr n;
2787 struct ndmsg ndm;
2788 char buf[256];
2789 } req;
2790 int ipa_len;
d62a17ae 2791 char buf[INET6_ADDRSTRLEN];
2792 char buf2[ETHER_ADDR_STRLEN];
931fa60c
MS
2793 const struct ipaddr *ip;
2794 const struct ethaddr *mac;
2795 uint8_t flags;
2796 uint16_t state;
d62a17ae 2797
5605ecfc 2798 memset(&req, 0, sizeof(req));
d62a17ae 2799
931fa60c
MS
2800 ip = dplane_ctx_neigh_get_ipaddr(ctx);
2801 mac = dplane_ctx_neigh_get_mac(ctx);
2802 if (is_zero_mac(mac))
2803 mac = NULL;
2804
2805 flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
2806 state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
2807
d62a17ae 2808 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2809 req.n.nlmsg_flags = NLM_F_REQUEST;
2810 if (cmd == RTM_NEWNEIGH)
2811 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2812 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2813 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
68e33151 2814 req.ndm.ndm_state = state;
931fa60c 2815 req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
d62a17ae 2816 req.ndm.ndm_type = RTN_UNICAST;
68e33151 2817 req.ndm.ndm_flags = flags;
d62a17ae 2818
2819 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2820 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2821 if (mac)
2822 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2823
2824 if (IS_ZEBRA_DEBUG_KERNEL)
6fe2b0e6 2825 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x",
d62a17ae 2826 nl_msg_type_to_str(cmd),
931fa60c
MS
2827 nl_family_to_str(req.ndm.ndm_family),
2828 dplane_ctx_get_ifname(ctx),
2829 dplane_ctx_get_ifindex(ctx),
2830 ipaddr2str(ip, buf, sizeof(buf)),
d62a17ae 2831 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
931fa60c
MS
2832 : "null",
2833 flags, state);
d62a17ae 2834
931fa60c
MS
2835 return netlink_talk_info(netlink_talk_filter, &req.n,
2836 dplane_ctx_get_ns(ctx), 0);
2232a77c 2837}
2838
036d93c0
MS
2839/*
2840 * Update MAC, using dataplane context object.
2841 */
2842enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 2843{
036d93c0 2844 return netlink_macfdb_update_ctx(ctx);
2232a77c 2845}
2846
931fa60c 2847enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx)
2232a77c 2848{
931fa60c 2849 int ret = -1;
2232a77c 2850
931fa60c
MS
2851 switch (dplane_ctx_get_op(ctx)) {
2852 case DPLANE_OP_NEIGH_INSTALL:
2853 case DPLANE_OP_NEIGH_UPDATE:
2854 ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH);
2855 break;
2856 case DPLANE_OP_NEIGH_DELETE:
2857 ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH);
2858 break;
0bbd4ff4
MS
2859 case DPLANE_OP_VTEP_ADD:
2860 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH);
2861 break;
2862 case DPLANE_OP_VTEP_DELETE:
2863 ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH);
2864 break;
931fa60c
MS
2865 default:
2866 break;
2867 }
2232a77c 2868
931fa60c
MS
2869 return (ret == 0 ?
2870 ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
6fe2b0e6
CS
2871}
2872
16c628de
MS
2873/*
2874 * MPLS label forwarding table change via netlink interface, using dataplane
2875 * context information.
2876 */
fc608372 2877int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx)
16c628de
MS
2878{
2879 mpls_lse_t lse;
81793ac1 2880 const zebra_nhlfe_t *nhlfe;
16c628de
MS
2881 struct nexthop *nexthop = NULL;
2882 unsigned int nexthop_num;
2883 const char *routedesc;
2884 int route_type;
2885
2886 struct {
2887 struct nlmsghdr n;
2888 struct rtmsg r;
2889 char buf[NL_PKT_BUF_SIZE];
2890 } req;
2891
2892 memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE);
2893
2894 /*
2895 * Count # nexthops so we can decide whether to use singlepath
2896 * or multipath case.
2897 */
2898 nexthop_num = 0;
2899 for (nhlfe = dplane_ctx_get_nhlfe(ctx); nhlfe; nhlfe = nhlfe->next) {
2900 nexthop = nhlfe->nexthop;
2901 if (!nexthop)
2902 continue;
2903 if (cmd == RTM_NEWROUTE) {
2904 /* Count all selected NHLFEs */
2905 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2906 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2907 nexthop_num++;
2908 } else { /* DEL */
2909 /* Count all installed NHLFEs */
2910 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2911 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2912 nexthop_num++;
2913 }
2914 }
2915
2916 if ((nexthop_num == 0) ||
2917 (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
2918 return 0;
2919
2920 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2921 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2922 req.n.nlmsg_type = cmd;
2923 req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
2924
2925 req.r.rtm_family = AF_MPLS;
2926 req.r.rtm_table = RT_TABLE_MAIN;
2927 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2928 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2929 req.r.rtm_type = RTN_UNICAST;
2930
2931 if (cmd == RTM_NEWROUTE) {
2932 /* We do a replace to handle update. */
2933 req.n.nlmsg_flags |= NLM_F_REPLACE;
2934
2935 /* set the protocol value if installing */
2936 route_type = re_type_from_lsp_type(
2937 dplane_ctx_get_best_nhlfe(ctx)->type);
2938 req.r.rtm_protocol = zebra2proto(route_type);
2939 }
2940
2941 /* Fill destination */
2942 lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
2943 addattr_l(&req.n, sizeof(req), RTA_DST, &lse, sizeof(mpls_lse_t));
2944
2945 /* Fill nexthops (paths) based on single-path or multipath. The paths
2946 * chosen depend on the operation.
2947 */
fc608372 2948 if (nexthop_num == 1) {
16c628de
MS
2949 routedesc = "single-path";
2950 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
2951 routedesc);
2952
2953 nexthop_num = 0;
2954 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
2955 nhlfe; nhlfe = nhlfe->next) {
2956 nexthop = nhlfe->nexthop;
2957 if (!nexthop)
2958 continue;
2959
2960 if ((cmd == RTM_NEWROUTE
2961 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2962 && CHECK_FLAG(nexthop->flags,
2963 NEXTHOP_FLAG_ACTIVE)))
2964 || (cmd == RTM_DELROUTE
2965 && (CHECK_FLAG(nhlfe->flags,
2966 NHLFE_FLAG_INSTALLED)
2967 && CHECK_FLAG(nexthop->flags,
2968 NEXTHOP_FLAG_FIB)))) {
2969 /* Add the gateway */
2970 _netlink_mpls_build_singlepath(
2971 routedesc, nhlfe,
2972 &req.n, &req.r,
2973 sizeof(req), cmd);
2974
2975 nexthop_num++;
2976 break;
2977 }
2978 }
2979 } else { /* Multipath case */
2980 char buf[NL_PKT_BUF_SIZE];
2981 struct rtattr *rta = (void *)buf;
2982 struct rtnexthop *rtnh;
81793ac1 2983 const union g_addr *src1 = NULL;
16c628de
MS
2984
2985 rta->rta_type = RTA_MULTIPATH;
2986 rta->rta_len = RTA_LENGTH(0);
2987 rtnh = RTA_DATA(rta);
2988
2989 routedesc = "multipath";
2990 _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
2991 routedesc);
2992
2993 nexthop_num = 0;
2994 for (nhlfe = dplane_ctx_get_nhlfe(ctx);
2995 nhlfe; nhlfe = nhlfe->next) {
2996 nexthop = nhlfe->nexthop;
2997 if (!nexthop)
2998 continue;
2999
16c628de
MS
3000 if ((cmd == RTM_NEWROUTE
3001 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
3002 && CHECK_FLAG(nexthop->flags,
3003 NEXTHOP_FLAG_ACTIVE)))
3004 || (cmd == RTM_DELROUTE
3005 && (CHECK_FLAG(nhlfe->flags,
3006 NHLFE_FLAG_INSTALLED)
3007 && CHECK_FLAG(nexthop->flags,
3008 NEXTHOP_FLAG_FIB)))) {
3009 nexthop_num++;
3010
3011 /* Build the multipath */
3012 _netlink_mpls_build_multipath(routedesc, nhlfe,
3013 rta, rtnh, &req.r,
3014 &src1);
3015 rtnh = RTNH_NEXT(rtnh);
3016 }
3017 }
3018
3019 /* Add the multipath */
3020 if (rta->rta_len > RTA_LENGTH(0))
3021 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
3022 RTA_DATA(rta), RTA_PAYLOAD(rta));
3023 }
3024
3025 /* Talk to netlink socket. */
3026 return netlink_talk_info(netlink_talk_filter, &req.n,
3027 dplane_ctx_get_ns(ctx), 0);
3028}
ddfeb486 3029#endif /* HAVE_NETLINK */