]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
0221162a3091617cd9080bf7d954da5853ea213e
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26 #include <linux/lwtunnel.h>
27 #include <linux/mpls_iptunnel.h>
28 #include <linux/neighbour.h>
29 #include <linux/rtnetlink.h>
30
31 /* Hack for GNU libc version 2. */
32 #ifndef MSG_TRUNC
33 #define MSG_TRUNC 0x20
34 #endif /* MSG_TRUNC */
35
36 #include "linklist.h"
37 #include "if.h"
38 #include "log.h"
39 #include "prefix.h"
40 #include "connected.h"
41 #include "table.h"
42 #include "memory.h"
43 #include "zebra_memory.h"
44 #include "rib.h"
45 #include "thread.h"
46 #include "privs.h"
47 #include "nexthop.h"
48 #include "vrf.h"
49 #include "vty.h"
50 #include "mpls.h"
51 #include "vxlan.h"
52
53 #include "zebra/zserv.h"
54 #include "zebra/zebra_ns.h"
55 #include "zebra/zebra_vrf.h"
56 #include "zebra/rt.h"
57 #include "zebra/redistribute.h"
58 #include "zebra/interface.h"
59 #include "zebra/debug.h"
60 #include "zebra/rtadv.h"
61 #include "zebra/zebra_ptm.h"
62 #include "zebra/zebra_mpls.h"
63 #include "zebra/kernel_netlink.h"
64 #include "zebra/rt_netlink.h"
65 #include "zebra/zebra_mroute.h"
66 #include "zebra/zebra_vxlan.h"
67
68 #ifndef AF_MPLS
69 #define AF_MPLS 28
70 #endif
71
72 static vlanid_t filter_vlan = 0;
73
74 struct gw_family_t {
75 u_int16_t filler;
76 u_int16_t family;
77 union g_addr gate;
78 };
79
80 char ipv4_ll_buf[16] = "169.254.0.1";
81 struct in_addr ipv4_ll;
82
83 /*
84 * The ipv4_ll data structure is used for all 5549
85 * additions to the kernel. Let's figure out the
86 * correct value one time instead for every
87 * install/remove of a 5549 type route
88 */
89 void rt_netlink_init(void)
90 {
91 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
92 }
93
94 static inline int is_selfroute(int proto)
95 {
96 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
97 || (proto == RTPROT_STATIC) || (proto == RTPROT_ZEBRA)
98 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
99 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
100 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
101 || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)) {
102 return 1;
103 }
104
105 return 0;
106 }
107
108 static inline int zebra2proto(int proto)
109 {
110 switch (proto) {
111 case ZEBRA_ROUTE_BABEL:
112 proto = RTPROT_BABEL;
113 break;
114 case ZEBRA_ROUTE_BGP:
115 proto = RTPROT_BGP;
116 break;
117 case ZEBRA_ROUTE_OSPF:
118 case ZEBRA_ROUTE_OSPF6:
119 proto = RTPROT_OSPF;
120 break;
121 case ZEBRA_ROUTE_STATIC:
122 proto = RTPROT_STATIC;
123 break;
124 case ZEBRA_ROUTE_ISIS:
125 proto = RTPROT_ISIS;
126 break;
127 case ZEBRA_ROUTE_RIP:
128 proto = RTPROT_RIP;
129 break;
130 case ZEBRA_ROUTE_RIPNG:
131 proto = RTPROT_RIPNG;
132 break;
133 case ZEBRA_ROUTE_NHRP:
134 proto = RTPROT_NHRP;
135 break;
136 case ZEBRA_ROUTE_EIGRP:
137 proto = RTPROT_EIGRP;
138 break;
139 case ZEBRA_ROUTE_LDP:
140 proto = RTPROT_LDP;
141 break;
142 case ZEBRA_ROUTE_SHARP:
143 proto = RTPROT_SHARP;
144 break;
145 default:
146 proto = RTPROT_ZEBRA;
147 break;
148 }
149
150 return proto;
151 }
152
153 static inline int proto2zebra(int proto, int family)
154 {
155 switch (proto) {
156 case RTPROT_BABEL:
157 proto = ZEBRA_ROUTE_BABEL;
158 break;
159 case RTPROT_BGP:
160 proto = ZEBRA_ROUTE_BGP;
161 break;
162 case RTPROT_OSPF:
163 proto = (family == AFI_IP) ?
164 ZEBRA_ROUTE_OSPF : ZEBRA_ROUTE_OSPF6;
165 break;
166 case RTPROT_ISIS:
167 proto = ZEBRA_ROUTE_ISIS;
168 break;
169 case RTPROT_RIP:
170 proto = ZEBRA_ROUTE_RIP;
171 break;
172 case RTPROT_RIPNG:
173 proto = ZEBRA_ROUTE_RIPNG;
174 break;
175 case RTPROT_NHRP:
176 proto = ZEBRA_ROUTE_NHRP;
177 break;
178 case RTPROT_EIGRP:
179 proto = ZEBRA_ROUTE_EIGRP;
180 break;
181 case RTPROT_LDP:
182 proto = ZEBRA_ROUTE_LDP;
183 break;
184 case RTPROT_STATIC:
185 proto = ZEBRA_ROUTE_STATIC;
186 break;
187 default:
188 proto = ZEBRA_ROUTE_KERNEL;
189 break;
190 }
191 return proto;
192 }
193
194 /*
195 Pending: create an efficient table_id (in a tree/hash) based lookup)
196 */
197 static vrf_id_t vrf_lookup_by_table(u_int32_t table_id)
198 {
199 struct vrf *vrf;
200 struct zebra_vrf *zvrf;
201
202 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
203 if ((zvrf = vrf->info) == NULL || (zvrf->table_id != table_id))
204 continue;
205
206 return zvrf_id(zvrf);
207 }
208
209 return VRF_DEFAULT;
210 }
211
212 /* Looking up routing table by netlink interface. */
213 static int netlink_route_change_read_unicast(struct sockaddr_nl *snl,
214 struct nlmsghdr *h, ns_id_t ns_id,
215 int startup)
216 {
217 int len;
218 struct rtmsg *rtm;
219 struct rtattr *tb[RTA_MAX + 1];
220 u_char flags = 0;
221 struct prefix p;
222 struct prefix_ipv6 src_p = {};
223 vrf_id_t vrf_id = VRF_DEFAULT;
224
225 char anyaddr[16] = {0};
226
227 int proto = ZEBRA_ROUTE_KERNEL;
228 int index = 0;
229 int table;
230 int metric = 0;
231 u_int32_t mtu = 0;
232 uint8_t distance = 0;
233 route_tag_t tag = 0;
234
235 void *dest = NULL;
236 void *gate = NULL;
237 void *prefsrc = NULL; /* IPv4 preferred source host address */
238 void *src = NULL; /* IPv6 srcdest source prefix */
239 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
240
241 rtm = NLMSG_DATA(h);
242
243 if (startup && h->nlmsg_type != RTM_NEWROUTE)
244 return 0;
245 switch (rtm->rtm_type) {
246 case RTN_UNICAST:
247 break;
248 case RTN_BLACKHOLE:
249 bh_type = BLACKHOLE_NULL;
250 break;
251 case RTN_UNREACHABLE:
252 bh_type = BLACKHOLE_REJECT;
253 break;
254 case RTN_PROHIBIT:
255 bh_type = BLACKHOLE_ADMINPROHIB;
256 break;
257 default:
258 return 0;
259 }
260
261 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
262 if (len < 0)
263 return -1;
264
265 memset(tb, 0, sizeof tb);
266 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
267
268 if (rtm->rtm_flags & RTM_F_CLONED)
269 return 0;
270 if (rtm->rtm_protocol == RTPROT_REDIRECT)
271 return 0;
272 if (rtm->rtm_protocol == RTPROT_KERNEL)
273 return 0;
274
275 if (!startup && is_selfroute(rtm->rtm_protocol)
276 && h->nlmsg_type == RTM_NEWROUTE)
277 return 0;
278
279 /* We don't care about change notifications for the MPLS table. */
280 /* TODO: Revisit this. */
281 if (rtm->rtm_family == AF_MPLS)
282 return 0;
283
284 /* Table corresponding to route. */
285 if (tb[RTA_TABLE])
286 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
287 else
288 table = rtm->rtm_table;
289
290 /* Map to VRF */
291 vrf_id = vrf_lookup_by_table(table);
292 if (vrf_id == VRF_DEFAULT) {
293 if (!is_zebra_valid_kernel_table(table)
294 && !is_zebra_main_routing_table(table))
295 return 0;
296 }
297
298 /* Route which inserted by Zebra. */
299 if (is_selfroute(rtm->rtm_protocol)) {
300 flags |= ZEBRA_FLAG_SELFROUTE;
301 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
302 }
303 if (tb[RTA_OIF])
304 index = *(int *)RTA_DATA(tb[RTA_OIF]);
305
306 if (tb[RTA_DST])
307 dest = RTA_DATA(tb[RTA_DST]);
308 else
309 dest = anyaddr;
310
311 if (tb[RTA_SRC])
312 src = RTA_DATA(tb[RTA_SRC]);
313 else
314 src = anyaddr;
315
316 if (tb[RTA_PREFSRC])
317 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
318
319 if (tb[RTA_GATEWAY])
320 gate = RTA_DATA(tb[RTA_GATEWAY]);
321
322 if (tb[RTA_PRIORITY])
323 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
324
325 #if defined(SUPPORT_REALMS)
326 if (tb[RTA_FLOW])
327 tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
328 #endif
329
330 if (tb[RTA_METRICS]) {
331 struct rtattr *mxrta[RTAX_MAX + 1];
332
333 memset(mxrta, 0, sizeof mxrta);
334 netlink_parse_rtattr(mxrta, RTAX_MAX,
335 RTA_DATA(tb[RTA_METRICS]),
336 RTA_PAYLOAD(tb[RTA_METRICS]));
337
338 if (mxrta[RTAX_MTU])
339 mtu = *(u_int32_t *)RTA_DATA(mxrta[RTAX_MTU]);
340 }
341
342 if (rtm->rtm_family == AF_INET) {
343 p.family = AF_INET;
344 memcpy(&p.u.prefix4, dest, 4);
345 p.prefixlen = rtm->rtm_dst_len;
346
347 src_p.prefixlen =
348 0; // Forces debug below to not display anything
349 } else if (rtm->rtm_family == AF_INET6) {
350 p.family = AF_INET6;
351 memcpy(&p.u.prefix6, dest, 16);
352 p.prefixlen = rtm->rtm_dst_len;
353
354 src_p.family = AF_INET6;
355 memcpy(&src_p.prefix, src, 16);
356 src_p.prefixlen = rtm->rtm_src_len;
357 }
358
359 if (rtm->rtm_src_len != 0) {
360 char buf[PREFIX_STRLEN];
361 zlog_warn(
362 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
363 prefix2str(&p, buf, sizeof(buf)), vrf_id);
364 return 0;
365 }
366
367 /*
368 * For ZEBRA_ROUTE_KERNEL types:
369 *
370 * The metric/priority of the route received from the kernel
371 * is a 32 bit number. We are going to interpret the high
372 * order byte as the Admin Distance and the low order 3 bytes
373 * as the metric.
374 *
375 * This will allow us to do two things:
376 * 1) Allow the creation of kernel routes that can be
377 * overridden by zebra.
378 * 2) Allow the old behavior for 'most' kernel route types
379 * if a user enters 'ip route ...' v4 routes get a metric
380 * of 0 and v6 routes get a metric of 1024. Both of these
381 * values will end up with a admin distance of 0, which
382 * will cause them to win for the purposes of zebra.
383 */
384 if (proto == ZEBRA_ROUTE_KERNEL) {
385 distance = (metric >> 24) & 0xFF;
386 metric = (metric & 0x00FFFFFF);
387 }
388
389 if (IS_ZEBRA_DEBUG_KERNEL) {
390 char buf[PREFIX_STRLEN];
391 char buf2[PREFIX_STRLEN];
392 zlog_debug(
393 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h->nlmsg_type),
394 prefix2str(&p, buf, sizeof(buf)),
395 src_p.prefixlen ? " from " : "",
396 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
397 : "",
398 vrf_id, metric, distance);
399 }
400
401 afi_t afi = AFI_IP;
402 if (rtm->rtm_family == AF_INET6)
403 afi = AFI_IP6;
404
405 if (h->nlmsg_type == RTM_NEWROUTE) {
406 struct interface *ifp;
407 vrf_id_t nh_vrf_id = vrf_id;
408
409 if (!tb[RTA_MULTIPATH]) {
410 struct nexthop nh;
411 size_t sz = (afi == AFI_IP) ? 4 : 16;
412
413 memset(&nh, 0, sizeof(nh));
414
415 if (bh_type == BLACKHOLE_UNSPEC) {
416 if (index && !gate)
417 nh.type = NEXTHOP_TYPE_IFINDEX;
418 else if (index && gate)
419 nh.type = (afi == AFI_IP)
420 ? NEXTHOP_TYPE_IPV4_IFINDEX
421 : NEXTHOP_TYPE_IPV6_IFINDEX;
422 else if (!index && gate)
423 nh.type = (afi == AFI_IP)
424 ? NEXTHOP_TYPE_IPV4
425 : NEXTHOP_TYPE_IPV6;
426 else {
427 nh.type = NEXTHOP_TYPE_BLACKHOLE;
428 nh.bh_type = bh_type;
429 }
430 } else {
431 nh.type = NEXTHOP_TYPE_BLACKHOLE;
432 nh.bh_type = bh_type;
433 }
434 nh.ifindex = index;
435 if (prefsrc)
436 memcpy(&nh.src, prefsrc, sz);
437 if (gate)
438 memcpy(&nh.gate, gate, sz);
439
440 if (index) {
441 ifp = if_lookup_by_index(index,
442 VRF_UNKNOWN);
443 if (ifp)
444 nh_vrf_id = ifp->vrf_id;
445 }
446
447 rib_add(afi, SAFI_UNICAST, vrf_id, nh_vrf_id, proto,
448 0, flags, &p, NULL, &nh, table, metric,
449 mtu, distance, tag);
450 } else {
451 /* This is a multipath route */
452
453 struct route_entry *re;
454 struct rtnexthop *rtnh =
455 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
456
457 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
458
459 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
460 re->type = proto;
461 re->distance = distance;
462 re->flags = flags;
463 re->metric = metric;
464 re->mtu = mtu;
465 re->vrf_id = vrf_id;
466 re->nh_vrf_id = vrf_id;
467 re->table = table;
468 re->nexthop_num = 0;
469 re->uptime = time(NULL);
470 re->tag = tag;
471
472 for (;;) {
473 if (len < (int)sizeof(*rtnh)
474 || rtnh->rtnh_len > len)
475 break;
476
477 index = rtnh->rtnh_ifindex;
478 if (index) {
479 /*
480 * Yes we are looking this up
481 * for every nexthop and just
482 * using the last one looked
483 * up right now
484 */
485 ifp = if_lookup_by_index(index,
486 VRF_UNKNOWN);
487 if (ifp)
488 re->nh_vrf_id = ifp->vrf_id;
489 }
490 gate = 0;
491 if (rtnh->rtnh_len > sizeof(*rtnh)) {
492 memset(tb, 0, sizeof(tb));
493 netlink_parse_rtattr(
494 tb, RTA_MAX, RTNH_DATA(rtnh),
495 rtnh->rtnh_len - sizeof(*rtnh));
496 if (tb[RTA_GATEWAY])
497 gate = RTA_DATA(
498 tb[RTA_GATEWAY]);
499 }
500
501 if (gate) {
502 if (rtm->rtm_family == AF_INET) {
503 if (index)
504 route_entry_nexthop_ipv4_ifindex_add(
505 re, gate,
506 prefsrc, index);
507 else
508 route_entry_nexthop_ipv4_add(
509 re, gate,
510 prefsrc);
511 } else if (rtm->rtm_family
512 == AF_INET6) {
513 if (index)
514 route_entry_nexthop_ipv6_ifindex_add(
515 re, gate,
516 index);
517 else
518 route_entry_nexthop_ipv6_add(
519 re, gate);
520 }
521 } else
522 route_entry_nexthop_ifindex_add(re,
523 index);
524
525 len -= NLMSG_ALIGN(rtnh->rtnh_len);
526 rtnh = RTNH_NEXT(rtnh);
527 }
528
529 zserv_nexthop_num_warn(__func__,
530 (const struct prefix *)&p,
531 re->nexthop_num);
532 if (re->nexthop_num == 0)
533 XFREE(MTYPE_RE, re);
534 else
535 rib_add_multipath(afi, SAFI_UNICAST, &p,
536 NULL, re);
537 }
538 } else {
539 if (!tb[RTA_MULTIPATH]) {
540 struct nexthop nh;
541 size_t sz = (afi == AFI_IP) ? 4 : 16;
542
543 memset(&nh, 0, sizeof(nh));
544 if (bh_type == BLACKHOLE_UNSPEC) {
545 if (index && !gate)
546 nh.type = NEXTHOP_TYPE_IFINDEX;
547 else if (index && gate)
548 nh.type =
549 (afi == AFI_IP)
550 ? NEXTHOP_TYPE_IPV4_IFINDEX
551 : NEXTHOP_TYPE_IPV6_IFINDEX;
552 else if (!index && gate)
553 nh.type = (afi == AFI_IP)
554 ? NEXTHOP_TYPE_IPV4
555 : NEXTHOP_TYPE_IPV6;
556 else {
557 nh.type = NEXTHOP_TYPE_BLACKHOLE;
558 nh.bh_type = BLACKHOLE_UNSPEC;
559 }
560 } else {
561 nh.type = NEXTHOP_TYPE_BLACKHOLE;
562 nh.bh_type = bh_type;
563 }
564 nh.ifindex = index;
565 if (gate)
566 memcpy(&nh.gate, gate, sz);
567 rib_delete(afi, SAFI_UNICAST, vrf_id,
568 proto, 0, flags, &p, NULL, &nh,
569 table, metric, true, NULL);
570 } else {
571 /* XXX: need to compare the entire list of nexthops
572 * here for NLM_F_APPEND stupidity */
573 rib_delete(afi, SAFI_UNICAST, vrf_id,
574 proto, 0, flags, &p, NULL, NULL,
575 table, metric, true, NULL);
576 }
577 }
578
579 return 0;
580 }
581
582 static struct mcast_route_data *mroute = NULL;
583
584 static int netlink_route_change_read_multicast(struct sockaddr_nl *snl,
585 struct nlmsghdr *h,
586 ns_id_t ns_id, int startup)
587 {
588 int len;
589 struct rtmsg *rtm;
590 struct rtattr *tb[RTA_MAX + 1];
591 struct mcast_route_data *m;
592 struct mcast_route_data mr;
593 int iif = 0;
594 int count;
595 int oif[256];
596 int oif_count = 0;
597 char sbuf[40];
598 char gbuf[40];
599 char oif_list[256] = "\0";
600 vrf_id_t vrf = ns_id;
601 int table;
602
603 if (mroute)
604 m = mroute;
605 else {
606 memset(&mr, 0, sizeof(mr));
607 m = &mr;
608 }
609
610 rtm = NLMSG_DATA(h);
611
612 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
613
614 memset(tb, 0, sizeof tb);
615 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
616
617 if (tb[RTA_TABLE])
618 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
619 else
620 table = rtm->rtm_table;
621
622 vrf = vrf_lookup_by_table(table);
623
624 if (tb[RTA_IIF])
625 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
626
627 if (tb[RTA_SRC])
628 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
629
630 if (tb[RTA_DST])
631 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
632
633 if ((RTA_EXPIRES <= RTA_MAX) && tb[RTA_EXPIRES])
634 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
635
636 if (tb[RTA_MULTIPATH]) {
637 struct rtnexthop *rtnh =
638 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
639
640 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
641 for (;;) {
642 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
643 break;
644
645 oif[oif_count] = rtnh->rtnh_ifindex;
646 oif_count++;
647
648 len -= NLMSG_ALIGN(rtnh->rtnh_len);
649 rtnh = RTNH_NEXT(rtnh);
650 }
651 }
652
653 if (IS_ZEBRA_DEBUG_KERNEL) {
654 struct interface *ifp;
655 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
656 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
657 for (count = 0; count < oif_count; count++) {
658 ifp = if_lookup_by_index(oif[count], vrf);
659 char temp[256];
660
661 sprintf(temp, "%s ", ifp->name);
662 strcat(oif_list, temp);
663 }
664 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf);
665 ifp = if_lookup_by_index(iif, vrf);
666 zlog_debug(
667 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
668 zvrf->vrf->name, vrf, nl_msg_type_to_str(h->nlmsg_type),
669 sbuf, gbuf, ifp->name, oif_list, m->lastused);
670 }
671 return 0;
672 }
673
674 int netlink_route_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
675 ns_id_t ns_id, int startup)
676 {
677 int len;
678 vrf_id_t vrf_id = ns_id;
679 struct rtmsg *rtm;
680
681 rtm = NLMSG_DATA(h);
682
683 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
684 /* If this is not route add/delete message print warning. */
685 zlog_warn("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
686 return 0;
687 }
688
689 /* Connected route. */
690 if (IS_ZEBRA_DEBUG_KERNEL)
691 zlog_debug("%s %s %s proto %s vrf %u",
692 nl_msg_type_to_str(h->nlmsg_type),
693 nl_family_to_str(rtm->rtm_family),
694 nl_rttype_to_str(rtm->rtm_type),
695 nl_rtproto_to_str(rtm->rtm_protocol), vrf_id);
696
697 /* We don't care about change notifications for the MPLS table. */
698 /* TODO: Revisit this. */
699 if (rtm->rtm_family == AF_MPLS)
700 return 0;
701
702 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
703 if (len < 0)
704 return -1;
705
706 if (rtm->rtm_type == RTN_MULTICAST)
707 netlink_route_change_read_multicast(snl, h, ns_id, startup);
708 else
709 netlink_route_change_read_unicast(snl, h, ns_id, startup);
710 return 0;
711 }
712
713 /* Request for specific route information from the kernel */
714 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
715 {
716 struct {
717 struct nlmsghdr n;
718 struct rtmsg rtm;
719 } req;
720
721 /* Form the request, specifying filter (rtattr) if needed. */
722 memset(&req, 0, sizeof(req));
723 req.n.nlmsg_type = type;
724 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
725 req.rtm.rtm_family = family;
726
727 return netlink_request(&zns->netlink_cmd, &req.n);
728 }
729
730 /* Routing table read function using netlink interface. Only called
731 bootstrap time. */
732 int netlink_route_read(struct zebra_ns *zns)
733 {
734 int ret;
735
736 /* Get IPv4 routing table. */
737 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
738 if (ret < 0)
739 return ret;
740 ret = netlink_parse_info(netlink_route_change_read_unicast,
741 &zns->netlink_cmd, zns, 0, 1);
742 if (ret < 0)
743 return ret;
744
745 /* Get IPv6 routing table. */
746 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
747 if (ret < 0)
748 return ret;
749 ret = netlink_parse_info(netlink_route_change_read_unicast,
750 &zns->netlink_cmd, zns, 0, 1);
751 if (ret < 0)
752 return ret;
753
754 return 0;
755 }
756
757 static void _netlink_route_nl_add_gateway_info(u_char route_family,
758 u_char gw_family,
759 struct nlmsghdr *nlmsg,
760 size_t req_size, int bytelen,
761 struct nexthop *nexthop)
762 {
763 if (route_family == AF_MPLS) {
764 struct gw_family_t gw_fam;
765
766 gw_fam.family = gw_family;
767 if (gw_family == AF_INET)
768 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
769 else
770 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
771 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
772 bytelen + 2);
773 } else {
774 if (gw_family == AF_INET)
775 addattr_l(nlmsg, req_size, RTA_GATEWAY,
776 &nexthop->gate.ipv4, bytelen);
777 else
778 addattr_l(nlmsg, req_size, RTA_GATEWAY,
779 &nexthop->gate.ipv6, bytelen);
780 }
781 }
782
783 static void _netlink_route_rta_add_gateway_info(u_char route_family,
784 u_char gw_family,
785 struct rtattr *rta,
786 struct rtnexthop *rtnh,
787 size_t req_size, int bytelen,
788 struct nexthop *nexthop)
789 {
790 if (route_family == AF_MPLS) {
791 struct gw_family_t gw_fam;
792
793 gw_fam.family = gw_family;
794 if (gw_family == AF_INET)
795 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
796 else
797 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
798 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
799 bytelen + 2);
800 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
801 } else {
802 if (gw_family == AF_INET)
803 rta_addattr_l(rta, req_size, RTA_GATEWAY,
804 &nexthop->gate.ipv4, bytelen);
805 else
806 rta_addattr_l(rta, req_size, RTA_GATEWAY,
807 &nexthop->gate.ipv6, bytelen);
808 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
809 }
810 }
811
812 /* This function takes a nexthop as argument and adds
813 * the appropriate netlink attributes to an existing
814 * netlink message.
815 *
816 * @param routedesc: Human readable description of route type
817 * (direct/recursive, single-/multipath)
818 * @param bytelen: Length of addresses in bytes.
819 * @param nexthop: Nexthop information
820 * @param nlmsg: nlmsghdr structure to fill in.
821 * @param req_size: The size allocated for the message.
822 */
823 static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
824 struct nexthop *nexthop,
825 struct nlmsghdr *nlmsg,
826 struct rtmsg *rtmsg,
827 size_t req_size, int cmd)
828 {
829 struct mpls_label_stack *nh_label;
830 mpls_lse_t out_lse[MPLS_MAX_LABELS];
831 char label_buf[256];
832
833 /*
834 * label_buf is *only* currently used within debugging.
835 * As such when we assign it we are guarding it inside
836 * a debug test. If you want to change this make sure
837 * you fix this assumption
838 */
839 label_buf[0] = '\0';
840 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
841 * (in the case of LER)
842 */
843 nh_label = nexthop->nh_label;
844 if (rtmsg->rtm_family == AF_MPLS) {
845 assert(nh_label);
846 assert(nh_label->num_labels == 1);
847 }
848
849 if (nh_label && nh_label->num_labels) {
850 int i, num_labels = 0;
851 u_int32_t bos;
852 char label_buf1[20];
853
854 for (i = 0; i < nh_label->num_labels; i++) {
855 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
856 bos = ((i == (nh_label->num_labels - 1)) ? 1
857 : 0);
858 out_lse[i] = mpls_lse_encode(nh_label->label[i],
859 0, 0, bos);
860 if (IS_ZEBRA_DEBUG_KERNEL) {
861 if (!num_labels)
862 sprintf(label_buf, "label %u",
863 nh_label->label[i]);
864 else {
865 sprintf(label_buf1, "/%u",
866 nh_label->label[i]);
867 strlcat(label_buf, label_buf1,
868 sizeof(label_buf));
869 }
870 }
871 num_labels++;
872 }
873 }
874 if (num_labels) {
875 if (rtmsg->rtm_family == AF_MPLS)
876 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
877 num_labels * sizeof(mpls_lse_t));
878 else {
879 struct rtattr *nest;
880 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
881
882 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE,
883 &encap, sizeof(u_int16_t));
884 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
885 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST,
886 &out_lse,
887 num_labels * sizeof(mpls_lse_t));
888 addattr_nest_end(nlmsg, nest);
889 }
890 }
891 }
892
893 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
894 rtmsg->rtm_flags |= RTNH_F_ONLINK;
895
896 if (rtmsg->rtm_family == AF_INET
897 && (nexthop->type == NEXTHOP_TYPE_IPV6
898 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
899 rtmsg->rtm_flags |= RTNH_F_ONLINK;
900 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
901 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
902
903 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
904 addattr_l(nlmsg, req_size, RTA_PREFSRC,
905 &nexthop->rmap_src.ipv4, bytelen);
906 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
907 addattr_l(nlmsg, req_size, RTA_PREFSRC,
908 &nexthop->src.ipv4, bytelen);
909
910 if (IS_ZEBRA_DEBUG_KERNEL)
911 zlog_debug(
912 " 5549: _netlink_route_build_singlepath() (%s): "
913 "nexthop via %s %s if %u",
914 routedesc, ipv4_ll_buf, label_buf,
915 nexthop->ifindex);
916 return;
917 }
918
919 if (nexthop->type == NEXTHOP_TYPE_IPV4
920 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
921 /* Send deletes to the kernel without specifying the next-hop */
922 if (cmd != RTM_DELROUTE)
923 _netlink_route_nl_add_gateway_info(
924 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
925 bytelen, nexthop);
926
927 if (cmd == RTM_NEWROUTE) {
928 if (nexthop->rmap_src.ipv4.s_addr)
929 addattr_l(nlmsg, req_size, RTA_PREFSRC,
930 &nexthop->rmap_src.ipv4, bytelen);
931 else if (nexthop->src.ipv4.s_addr)
932 addattr_l(nlmsg, req_size, RTA_PREFSRC,
933 &nexthop->src.ipv4, bytelen);
934 }
935
936 if (IS_ZEBRA_DEBUG_KERNEL)
937 zlog_debug(
938 "netlink_route_multipath() (%s): "
939 "nexthop via %s %s if %u",
940 routedesc, inet_ntoa(nexthop->gate.ipv4),
941 label_buf, nexthop->ifindex);
942 }
943
944 if (nexthop->type == NEXTHOP_TYPE_IPV6
945 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
946 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
947 nlmsg, req_size, bytelen,
948 nexthop);
949
950 if (cmd == RTM_NEWROUTE) {
951 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
952 addattr_l(nlmsg, req_size, RTA_PREFSRC,
953 &nexthop->rmap_src.ipv6, bytelen);
954 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
955 addattr_l(nlmsg, req_size, RTA_PREFSRC,
956 &nexthop->src.ipv6, bytelen);
957 }
958
959 if (IS_ZEBRA_DEBUG_KERNEL)
960 zlog_debug(
961 "netlink_route_multipath() (%s): "
962 "nexthop via %s %s if %u",
963 routedesc, inet6_ntoa(nexthop->gate.ipv6),
964 label_buf, nexthop->ifindex);
965 }
966
967 /*
968 * We have the ifindex so we should always send it
969 * This is especially useful if we are doing route
970 * leaking.
971 */
972 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
973 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
974
975 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
976 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
977 if (cmd == RTM_NEWROUTE) {
978 if (nexthop->rmap_src.ipv4.s_addr)
979 addattr_l(nlmsg, req_size, RTA_PREFSRC,
980 &nexthop->rmap_src.ipv4, bytelen);
981 else if (nexthop->src.ipv4.s_addr)
982 addattr_l(nlmsg, req_size, RTA_PREFSRC,
983 &nexthop->src.ipv4, bytelen);
984 }
985
986 if (IS_ZEBRA_DEBUG_KERNEL)
987 zlog_debug(
988 "netlink_route_multipath() (%s): "
989 "nexthop via if %u",
990 routedesc, nexthop->ifindex);
991 }
992
993 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
994 if (cmd == RTM_NEWROUTE) {
995 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
996 addattr_l(nlmsg, req_size, RTA_PREFSRC,
997 &nexthop->rmap_src.ipv6, bytelen);
998 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
999 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1000 &nexthop->src.ipv6, bytelen);
1001 }
1002
1003 if (IS_ZEBRA_DEBUG_KERNEL)
1004 zlog_debug(
1005 "netlink_route_multipath() (%s): "
1006 "nexthop via if %u",
1007 routedesc, nexthop->ifindex);
1008 }
1009 }
1010
1011 /* This function takes a nexthop as argument and
1012 * appends to the given rtattr/rtnexthop pair the
1013 * representation of the nexthop. If the nexthop
1014 * defines a preferred source, the src parameter
1015 * will be modified to point to that src, otherwise
1016 * it will be kept unmodified.
1017 *
1018 * @param routedesc: Human readable description of route type
1019 * (direct/recursive, single-/multipath)
1020 * @param bytelen: Length of addresses in bytes.
1021 * @param nexthop: Nexthop information
1022 * @param rta: rtnetlink attribute structure
1023 * @param rtnh: pointer to an rtnetlink nexthop structure
1024 * @param src: pointer pointing to a location where
1025 * the prefsrc should be stored.
1026 */
1027 static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1028 struct nexthop *nexthop,
1029 struct rtattr *rta,
1030 struct rtnexthop *rtnh,
1031 struct rtmsg *rtmsg,
1032 union g_addr **src)
1033 {
1034 struct mpls_label_stack *nh_label;
1035 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1036 char label_buf[256];
1037
1038 rtnh->rtnh_len = sizeof(*rtnh);
1039 rtnh->rtnh_flags = 0;
1040 rtnh->rtnh_hops = 0;
1041 rta->rta_len += rtnh->rtnh_len;
1042
1043 /*
1044 * label_buf is *only* currently used within debugging.
1045 * As such when we assign it we are guarding it inside
1046 * a debug test. If you want to change this make sure
1047 * you fix this assumption
1048 */
1049 label_buf[0] = '\0';
1050 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1051 * (in the case of LER)
1052 */
1053 nh_label = nexthop->nh_label;
1054 if (rtmsg->rtm_family == AF_MPLS) {
1055 assert(nh_label);
1056 assert(nh_label->num_labels == 1);
1057 }
1058
1059 if (nh_label && nh_label->num_labels) {
1060 int i, num_labels = 0;
1061 u_int32_t bos;
1062 char label_buf1[20];
1063
1064 for (i = 0; i < nh_label->num_labels; i++) {
1065 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
1066 bos = ((i == (nh_label->num_labels - 1)) ? 1
1067 : 0);
1068 out_lse[i] = mpls_lse_encode(nh_label->label[i],
1069 0, 0, bos);
1070 if (IS_ZEBRA_DEBUG_KERNEL) {
1071 if (!num_labels)
1072 sprintf(label_buf, "label %u",
1073 nh_label->label[i]);
1074 else {
1075 sprintf(label_buf1, "/%u",
1076 nh_label->label[i]);
1077 strlcat(label_buf, label_buf1,
1078 sizeof(label_buf));
1079 }
1080 }
1081 num_labels++;
1082 }
1083 }
1084 if (num_labels) {
1085 if (rtmsg->rtm_family == AF_MPLS) {
1086 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1087 &out_lse,
1088 num_labels * sizeof(mpls_lse_t));
1089 rtnh->rtnh_len += RTA_LENGTH(
1090 num_labels * sizeof(mpls_lse_t));
1091 } else {
1092 struct rtattr *nest;
1093 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
1094 int len = rta->rta_len;
1095
1096 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1097 RTA_ENCAP_TYPE, &encap,
1098 sizeof(u_int16_t));
1099 nest = rta_nest(rta, NL_PKT_BUF_SIZE,
1100 RTA_ENCAP);
1101 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1102 MPLS_IPTUNNEL_DST, &out_lse,
1103 num_labels * sizeof(mpls_lse_t));
1104 rta_nest_end(rta, nest);
1105 rtnh->rtnh_len += rta->rta_len - len;
1106 }
1107 }
1108 }
1109
1110 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1111 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1112
1113 if (rtmsg->rtm_family == AF_INET
1114 && (nexthop->type == NEXTHOP_TYPE_IPV6
1115 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1116 bytelen = 4;
1117 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1118 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1119 bytelen);
1120 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1121 rtnh->rtnh_ifindex = nexthop->ifindex;
1122
1123 if (nexthop->rmap_src.ipv4.s_addr)
1124 *src = &nexthop->rmap_src;
1125 else if (nexthop->src.ipv4.s_addr)
1126 *src = &nexthop->src;
1127
1128 if (IS_ZEBRA_DEBUG_KERNEL)
1129 zlog_debug(
1130 " 5549: netlink_route_build_multipath() (%s): "
1131 "nexthop via %s %s if %u",
1132 routedesc, ipv4_ll_buf, label_buf,
1133 nexthop->ifindex);
1134 return;
1135 }
1136
1137 if (nexthop->type == NEXTHOP_TYPE_IPV4
1138 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1139 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1140 rta, rtnh, NL_PKT_BUF_SIZE,
1141 bytelen, nexthop);
1142 if (nexthop->rmap_src.ipv4.s_addr)
1143 *src = &nexthop->rmap_src;
1144 else if (nexthop->src.ipv4.s_addr)
1145 *src = &nexthop->src;
1146
1147 if (IS_ZEBRA_DEBUG_KERNEL)
1148 zlog_debug(
1149 "netlink_route_multipath() (%s): "
1150 "nexthop via %s %s if %u",
1151 routedesc, inet_ntoa(nexthop->gate.ipv4),
1152 label_buf, nexthop->ifindex);
1153 }
1154 if (nexthop->type == NEXTHOP_TYPE_IPV6
1155 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1156 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1157 rta, rtnh, NL_PKT_BUF_SIZE,
1158 bytelen, nexthop);
1159
1160 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1161 *src = &nexthop->rmap_src;
1162 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1163 *src = &nexthop->src;
1164
1165 if (IS_ZEBRA_DEBUG_KERNEL)
1166 zlog_debug(
1167 "netlink_route_multipath() (%s): "
1168 "nexthop via %s %s if %u",
1169 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1170 label_buf, nexthop->ifindex);
1171 }
1172
1173 /*
1174 * We have figured out the ifindex so we should always send it
1175 * This is especially useful if we are doing route
1176 * leaking.
1177 */
1178 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1179 rtnh->rtnh_ifindex = nexthop->ifindex;
1180
1181 /* ifindex */
1182 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1183 || nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1184 if (nexthop->rmap_src.ipv4.s_addr)
1185 *src = &nexthop->rmap_src;
1186 else if (nexthop->src.ipv4.s_addr)
1187 *src = &nexthop->src;
1188
1189 if (IS_ZEBRA_DEBUG_KERNEL)
1190 zlog_debug(
1191 "netlink_route_multipath() (%s): "
1192 "nexthop via if %u",
1193 routedesc, nexthop->ifindex);
1194 } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1195 if (IS_ZEBRA_DEBUG_KERNEL)
1196 zlog_debug(
1197 "netlink_route_multipath() (%s): "
1198 "nexthop via if %u",
1199 routedesc, nexthop->ifindex);
1200 } else {
1201 rtnh->rtnh_ifindex = 0;
1202 }
1203 }
1204
1205 static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1206 zebra_nhlfe_t *nhlfe,
1207 struct nlmsghdr *nlmsg,
1208 struct rtmsg *rtmsg,
1209 size_t req_size, int cmd)
1210 {
1211 int bytelen;
1212 u_char family;
1213
1214 family = NHLFE_FAMILY(nhlfe);
1215 bytelen = (family == AF_INET ? 4 : 16);
1216 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1217 nlmsg, rtmsg, req_size, cmd);
1218 }
1219
1220
1221 static inline void
1222 _netlink_mpls_build_multipath(const char *routedesc, zebra_nhlfe_t *nhlfe,
1223 struct rtattr *rta, struct rtnexthop *rtnh,
1224 struct rtmsg *rtmsg, union g_addr **src)
1225 {
1226 int bytelen;
1227 u_char family;
1228
1229 family = NHLFE_FAMILY(nhlfe);
1230 bytelen = (family == AF_INET ? 4 : 16);
1231 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1232 rtnh, rtmsg, src);
1233 }
1234
1235
1236 /* Log debug information for netlink_route_multipath
1237 * if debug logging is enabled.
1238 *
1239 * @param cmd: Netlink command which is to be processed
1240 * @param p: Prefix for which the change is due
1241 * @param nexthop: Nexthop which is currently processed
1242 * @param routedesc: Semantic annotation for nexthop
1243 * (recursive, multipath, etc.)
1244 * @param family: Address family which the change concerns
1245 */
1246 static void _netlink_route_debug(int cmd, struct prefix *p,
1247 struct nexthop *nexthop, const char *routedesc,
1248 int family, struct zebra_vrf *zvrf)
1249 {
1250 if (IS_ZEBRA_DEBUG_KERNEL) {
1251 char buf[PREFIX_STRLEN];
1252 zlog_debug(
1253 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1254 routedesc, nl_msg_type_to_str(cmd),
1255 prefix2str(p, buf, sizeof(buf)), zvrf_id(zvrf),
1256 (nexthop) ? nexthop_type_to_str(nexthop->type) : "UNK");
1257 }
1258 }
1259
1260 static void _netlink_mpls_debug(int cmd, u_int32_t label, const char *routedesc)
1261 {
1262 if (IS_ZEBRA_DEBUG_KERNEL)
1263 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1264 nl_msg_type_to_str(cmd), label);
1265 }
1266
1267 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1268 int llalen)
1269 {
1270 struct {
1271 struct nlmsghdr n;
1272 struct ndmsg ndm;
1273 char buf[256];
1274 } req;
1275
1276 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1277
1278 memset(&req.n, 0, sizeof(req.n));
1279 memset(&req.ndm, 0, sizeof(req.ndm));
1280
1281 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1282 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1283 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1284 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1285
1286 req.ndm.ndm_family = AF_INET;
1287 req.ndm.ndm_state = NUD_PERMANENT;
1288 req.ndm.ndm_ifindex = ifindex;
1289 req.ndm.ndm_type = RTN_UNICAST;
1290
1291 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1292 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1293
1294 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1295 0);
1296 }
1297
1298 /* Routing table change via netlink interface. */
1299 /* Update flag indicates whether this is a "replace" or not. */
1300 static int netlink_route_multipath(int cmd, struct prefix *p,
1301 struct prefix *src_p, struct route_entry *re,
1302 int update)
1303 {
1304 int bytelen;
1305 struct sockaddr_nl snl;
1306 struct nexthop *nexthop = NULL;
1307 unsigned int nexthop_num;
1308 int discard = 0;
1309 int family = PREFIX_FAMILY(p);
1310 const char *routedesc;
1311 int setsrc = 0;
1312 union g_addr src;
1313
1314 struct {
1315 struct nlmsghdr n;
1316 struct rtmsg r;
1317 char buf[NL_PKT_BUF_SIZE];
1318 } req;
1319
1320 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1321 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1322
1323 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1324
1325 bytelen = (family == AF_INET ? 4 : 16);
1326
1327 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1328 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1329 if ((cmd == RTM_NEWROUTE) && update)
1330 req.n.nlmsg_flags |= NLM_F_REPLACE;
1331 req.n.nlmsg_type = cmd;
1332 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1333
1334 req.r.rtm_family = family;
1335 req.r.rtm_dst_len = p->prefixlen;
1336 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1337 req.r.rtm_protocol = zebra2proto(re->type);
1338 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1339 req.r.rtm_type = RTN_UNICAST;
1340
1341 addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1342 if (src_p)
1343 addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix,
1344 bytelen);
1345
1346 /* Metric. */
1347 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1348 * used
1349 * either by the kernel or by zebra. Its purely for calculating best
1350 * path(s)
1351 * by the routing protocol and for communicating with protocol peers.
1352 */
1353 addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1354 #if defined(SUPPORT_REALMS)
1355 if (re->tag > 0 && re->tag <= 255)
1356 addattr32(&req.n, sizeof req, RTA_FLOW, re->tag);
1357 #endif
1358 /* Table corresponding to this route. */
1359 if (re->table < 256)
1360 req.r.rtm_table = re->table;
1361 else {
1362 req.r.rtm_table = RT_TABLE_UNSPEC;
1363 addattr32(&req.n, sizeof req, RTA_TABLE, re->table);
1364 }
1365
1366 if (discard)
1367 goto skip;
1368
1369 if (re->mtu || re->nexthop_mtu) {
1370 char buf[NL_PKT_BUF_SIZE];
1371 struct rtattr *rta = (void *)buf;
1372 u_int32_t mtu = re->mtu;
1373 if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu))
1374 mtu = re->nexthop_mtu;
1375 rta->rta_type = RTA_METRICS;
1376 rta->rta_len = RTA_LENGTH(0);
1377 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1378 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1379 RTA_PAYLOAD(rta));
1380 }
1381
1382 /* Count overall nexthops so we can decide whether to use singlepath
1383 * or multipath case. */
1384 nexthop_num = 0;
1385 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1386 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1387 continue;
1388 if (cmd == RTM_NEWROUTE
1389 && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1390 continue;
1391 if (cmd == RTM_DELROUTE
1392 && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
1393 continue;
1394
1395 nexthop_num++;
1396 }
1397
1398 /* Singlepath case. */
1399 if (nexthop_num == 1 || multipath_num == 1) {
1400 nexthop_num = 0;
1401 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1402 /*
1403 * So we want to cover 2 types of blackhole
1404 * routes here:
1405 * 1) A normal blackhole route( ala from a static
1406 * install.
1407 * 2) A recursively resolved blackhole route
1408 */
1409 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1410 switch (nexthop->bh_type) {
1411 case BLACKHOLE_ADMINPROHIB:
1412 req.r.rtm_type = RTN_PROHIBIT;
1413 break;
1414 case BLACKHOLE_REJECT:
1415 req.r.rtm_type = RTN_UNREACHABLE;
1416 break;
1417 default:
1418 req.r.rtm_type = RTN_BLACKHOLE;
1419 break;
1420 }
1421 goto skip;
1422 }
1423 if (CHECK_FLAG(nexthop->flags,
1424 NEXTHOP_FLAG_RECURSIVE)) {
1425 if (!setsrc) {
1426 if (family == AF_INET) {
1427 if (nexthop->rmap_src.ipv4
1428 .s_addr
1429 != 0) {
1430 src.ipv4 =
1431 nexthop->rmap_src
1432 .ipv4;
1433 setsrc = 1;
1434 } else if (nexthop->src.ipv4
1435 .s_addr
1436 != 0) {
1437 src.ipv4 =
1438 nexthop->src
1439 .ipv4;
1440 setsrc = 1;
1441 }
1442 } else if (family == AF_INET6) {
1443 if (!IN6_IS_ADDR_UNSPECIFIED(
1444 &nexthop->rmap_src
1445 .ipv6)) {
1446 src.ipv6 =
1447 nexthop->rmap_src
1448 .ipv6;
1449 setsrc = 1;
1450 } else if (
1451 !IN6_IS_ADDR_UNSPECIFIED(
1452 &nexthop->src
1453 .ipv6)) {
1454 src.ipv6 =
1455 nexthop->src
1456 .ipv6;
1457 setsrc = 1;
1458 }
1459 }
1460 }
1461 continue;
1462 }
1463
1464 if ((cmd == RTM_NEWROUTE
1465 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1466 || (cmd == RTM_DELROUTE
1467 && CHECK_FLAG(nexthop->flags,
1468 NEXTHOP_FLAG_FIB))) {
1469 routedesc = nexthop->rparent
1470 ? "recursive, single-path"
1471 : "single-path";
1472
1473 _netlink_route_debug(cmd, p, nexthop, routedesc,
1474 family, zvrf);
1475 _netlink_route_build_singlepath(
1476 routedesc, bytelen, nexthop, &req.n,
1477 &req.r, sizeof req, cmd);
1478 nexthop_num++;
1479 break;
1480 }
1481 }
1482 if (setsrc && (cmd == RTM_NEWROUTE)) {
1483 if (family == AF_INET)
1484 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1485 &src.ipv4, bytelen);
1486 else if (family == AF_INET6)
1487 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1488 &src.ipv6, bytelen);
1489 }
1490 } else {
1491 char buf[NL_PKT_BUF_SIZE];
1492 struct rtattr *rta = (void *)buf;
1493 struct rtnexthop *rtnh;
1494 union g_addr *src1 = NULL;
1495
1496 rta->rta_type = RTA_MULTIPATH;
1497 rta->rta_len = RTA_LENGTH(0);
1498 rtnh = RTA_DATA(rta);
1499
1500 nexthop_num = 0;
1501 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1502 if (nexthop_num >= multipath_num)
1503 break;
1504
1505 if (CHECK_FLAG(nexthop->flags,
1506 NEXTHOP_FLAG_RECURSIVE)) {
1507 /* This only works for IPv4 now */
1508 if (!setsrc) {
1509 if (family == AF_INET) {
1510 if (nexthop->rmap_src.ipv4
1511 .s_addr
1512 != 0) {
1513 src.ipv4 =
1514 nexthop->rmap_src
1515 .ipv4;
1516 setsrc = 1;
1517 } else if (nexthop->src.ipv4
1518 .s_addr
1519 != 0) {
1520 src.ipv4 =
1521 nexthop->src
1522 .ipv4;
1523 setsrc = 1;
1524 }
1525 } else if (family == AF_INET6) {
1526 if (!IN6_IS_ADDR_UNSPECIFIED(
1527 &nexthop->rmap_src
1528 .ipv6)) {
1529 src.ipv6 =
1530 nexthop->rmap_src
1531 .ipv6;
1532 setsrc = 1;
1533 } else if (
1534 !IN6_IS_ADDR_UNSPECIFIED(
1535 &nexthop->src
1536 .ipv6)) {
1537 src.ipv6 =
1538 nexthop->src
1539 .ipv6;
1540 setsrc = 1;
1541 }
1542 }
1543 }
1544 continue;
1545 }
1546
1547 if ((cmd == RTM_NEWROUTE
1548 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1549 || (cmd == RTM_DELROUTE
1550 && CHECK_FLAG(nexthop->flags,
1551 NEXTHOP_FLAG_FIB))) {
1552 routedesc = nexthop->rparent
1553 ? "recursive, multipath"
1554 : "multipath";
1555 nexthop_num++;
1556
1557 _netlink_route_debug(cmd, p, nexthop, routedesc,
1558 family, zvrf);
1559 _netlink_route_build_multipath(
1560 routedesc, bytelen, nexthop, rta, rtnh,
1561 &req.r, &src1);
1562 rtnh = RTNH_NEXT(rtnh);
1563
1564 if (!setsrc && src1) {
1565 if (family == AF_INET)
1566 src.ipv4 = src1->ipv4;
1567 else if (family == AF_INET6)
1568 src.ipv6 = src1->ipv6;
1569
1570 setsrc = 1;
1571 }
1572 }
1573 }
1574 if (setsrc && (cmd == RTM_NEWROUTE)) {
1575 if (family == AF_INET)
1576 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1577 &src.ipv4, bytelen);
1578 else if (family == AF_INET6)
1579 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1580 &src.ipv6, bytelen);
1581 if (IS_ZEBRA_DEBUG_KERNEL)
1582 zlog_debug("Setting source");
1583 }
1584
1585 if (rta->rta_len > RTA_LENGTH(0))
1586 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1587 RTA_DATA(rta), RTA_PAYLOAD(rta));
1588 }
1589
1590 /* If there is no useful nexthop then return. */
1591 if (nexthop_num == 0) {
1592 if (IS_ZEBRA_DEBUG_KERNEL)
1593 zlog_debug(
1594 "netlink_route_multipath(): No useful nexthop.");
1595 return 0;
1596 }
1597
1598 skip:
1599
1600 /* Destination netlink address. */
1601 memset(&snl, 0, sizeof snl);
1602 snl.nl_family = AF_NETLINK;
1603
1604 /* Talk to netlink socket. */
1605 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1606 0);
1607 }
1608
1609 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1610 {
1611 int suc = 0;
1612 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1613 struct {
1614 struct nlmsghdr n;
1615 struct ndmsg ndm;
1616 char buf[256];
1617 } req;
1618
1619 mroute = mr;
1620 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1621
1622 memset(&req.n, 0, sizeof(req.n));
1623 memset(&req.ndm, 0, sizeof(req.ndm));
1624
1625 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1626 req.n.nlmsg_flags = NLM_F_REQUEST;
1627 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1628
1629 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1630 req.n.nlmsg_type = RTM_GETROUTE;
1631
1632 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1633 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1634 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1635 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1636 addattr_l(&req.n, sizeof(req), RTA_TABLE, &zvrf->table_id, 4);
1637
1638 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1639 &zns->netlink_cmd, zns, 0);
1640
1641 mroute = NULL;
1642 return suc;
1643 }
1644
1645 void kernel_route_rib(struct prefix *p, struct prefix *src_p,
1646 struct route_entry *old, struct route_entry *new)
1647 {
1648 int ret = 0;
1649
1650 assert(old || new);
1651
1652 if (new) {
1653 if (p->family == AF_INET)
1654 ret = netlink_route_multipath(RTM_NEWROUTE, p, src_p,
1655 new, (old) ? 1 : 0);
1656 else {
1657 /*
1658 * So v6 route replace semantics are not in
1659 * the kernel at this point as I understand it.
1660 * So let's do a delete than an add.
1661 * In the future once v6 route replace semantics
1662 * are in we can figure out what to do here to
1663 * allow working with old and new kernels.
1664 *
1665 * I'm also intentionally ignoring the failure case
1666 * of the route delete. If that happens yeah we're
1667 * screwed.
1668 */
1669 if (old)
1670 netlink_route_multipath(RTM_DELROUTE, p,
1671 src_p, old, 0);
1672 ret = netlink_route_multipath(RTM_NEWROUTE, p,
1673 src_p, new, 0);
1674 }
1675 kernel_route_rib_pass_fail(p, new,
1676 (!ret) ?
1677 SOUTHBOUND_INSTALL_SUCCESS :
1678 SOUTHBOUND_INSTALL_FAILURE);
1679 return;
1680 }
1681
1682 if (old) {
1683 ret = netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0);
1684
1685 kernel_route_rib_pass_fail(p, old,
1686 (!ret) ?
1687 SOUTHBOUND_DELETE_SUCCESS :
1688 SOUTHBOUND_DELETE_FAILURE);
1689 }
1690 }
1691
1692 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1693 int llalen)
1694 {
1695 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1696 addr, lla, llalen);
1697 }
1698
1699 /*
1700 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1701 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1702 */
1703 static int netlink_vxlan_flood_list_update(struct interface *ifp,
1704 struct in_addr *vtep_ip, int cmd)
1705 {
1706 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1707 struct {
1708 struct nlmsghdr n;
1709 struct ndmsg ndm;
1710 char buf[256];
1711 } req;
1712 u_char dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1713
1714 memset(&req.n, 0, sizeof(req.n));
1715 memset(&req.ndm, 0, sizeof(req.ndm));
1716
1717 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1718 req.n.nlmsg_flags = NLM_F_REQUEST;
1719 if (cmd == RTM_NEWNEIGH)
1720 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1721 req.n.nlmsg_type = cmd;
1722 req.ndm.ndm_family = PF_BRIDGE;
1723 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1724 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1725
1726
1727 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1728 req.ndm.ndm_ifindex = ifp->ifindex;
1729 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1730
1731 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1732 0);
1733 }
1734
1735 /*
1736 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1737 * adding
1738 * a "flood" MAC FDB entry.
1739 */
1740 int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1741 {
1742 if (IS_ZEBRA_DEBUG_VXLAN)
1743 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1744 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1745
1746 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
1747 }
1748
1749 /*
1750 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1751 * deleting the "flood" MAC FDB entry.
1752 */
1753 int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1754 {
1755 if (IS_ZEBRA_DEBUG_VXLAN)
1756 zlog_debug(
1757 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1758 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1759
1760 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
1761 }
1762
1763 #ifndef NDA_RTA
1764 #define NDA_RTA(r) \
1765 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1766 #endif
1767
1768 static int netlink_macfdb_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
1769 int len)
1770 {
1771 struct ndmsg *ndm;
1772 struct interface *ifp;
1773 struct zebra_if *zif;
1774 struct rtattr *tb[NDA_MAX + 1];
1775 struct interface *br_if;
1776 struct ethaddr mac;
1777 vlanid_t vid = 0;
1778 struct prefix vtep_ip;
1779 int vid_present = 0, dst_present = 0;
1780 char buf[ETHER_ADDR_STRLEN];
1781 char vid_buf[20];
1782 char dst_buf[30];
1783 u_char sticky = 0;
1784
1785 ndm = NLMSG_DATA(h);
1786
1787 /* We only process macfdb notifications if EVPN is enabled */
1788 if (!is_evpn_enabled())
1789 return 0;
1790
1791 /* The interface should exist. */
1792 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
1793 ndm->ndm_ifindex);
1794 if (!ifp || !ifp->info)
1795 return 0;
1796
1797 /* The interface should be something we're interested in. */
1798 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
1799 return 0;
1800
1801 /* Drop "permanent" entries. */
1802 if (ndm->ndm_state & NUD_PERMANENT)
1803 return 0;
1804
1805 zif = (struct zebra_if *)ifp->info;
1806 if ((br_if = zif->brslave_info.br_if) == NULL) {
1807 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1808 nl_msg_type_to_str(h->nlmsg_type),
1809 nl_family_to_str(ndm->ndm_family), ifp->name,
1810 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1811 return 0;
1812 }
1813
1814 /* Parse attributes and extract fields of interest. */
1815 memset(tb, 0, sizeof tb);
1816 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
1817
1818 if (!tb[NDA_LLADDR]) {
1819 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1820 nl_msg_type_to_str(h->nlmsg_type),
1821 nl_family_to_str(ndm->ndm_family), ifp->name,
1822 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1823 return 0;
1824 }
1825
1826 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
1827 zlog_warn(
1828 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
1829 nl_msg_type_to_str(h->nlmsg_type),
1830 nl_family_to_str(ndm->ndm_family), ifp->name,
1831 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex,
1832 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
1833 return 0;
1834 }
1835
1836 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
1837
1838 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
1839 vid_present = 1;
1840 vid = *(u_int16_t *)RTA_DATA(tb[NDA_VLAN]);
1841 sprintf(vid_buf, " VLAN %u", vid);
1842 }
1843
1844 if (tb[NDA_DST]) {
1845 /* TODO: Only IPv4 supported now. */
1846 dst_present = 1;
1847 vtep_ip.family = AF_INET;
1848 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
1849 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
1850 IPV4_MAX_BYTELEN);
1851 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
1852 }
1853
1854 sticky = (ndm->ndm_state & NUD_NOARP) ? 1 : 0;
1855
1856 if (IS_ZEBRA_DEBUG_KERNEL)
1857 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1858 nl_msg_type_to_str(h->nlmsg_type),
1859 nl_family_to_str(ndm->ndm_family), ifp->name,
1860 ndm->ndm_ifindex, vid_present ? vid_buf : "",
1861 sticky ? "sticky " : "",
1862 prefix_mac2str(&mac, buf, sizeof(buf)),
1863 dst_present ? dst_buf : "");
1864
1865 if (filter_vlan && vid != filter_vlan)
1866 return 0;
1867
1868 /* If add or update, do accordingly if learnt on a "local" interface; if
1869 * the notification is over VxLAN, this has to be related to
1870 * multi-homing,
1871 * so perform an implicit delete of any local entry (if it exists).
1872 */
1873 if (h->nlmsg_type == RTM_NEWNEIGH) {
1874 /* Drop "permanent" entries. */
1875 if (ndm->ndm_state & NUD_PERMANENT)
1876 return 0;
1877
1878 if (IS_ZEBRA_IF_VXLAN(ifp))
1879 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
1880 vid);
1881
1882 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
1883 sticky);
1884 }
1885
1886 /* This is a delete notification.
1887 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1888 * 2. For a MAC over "local" interface, delete the mac
1889 * Note: We will get notifications from both bridge driver and VxLAN
1890 * driver.
1891 * Ignore the notification from VxLan driver as it is also generated
1892 * when mac moves from remote to local.
1893 */
1894 if (dst_present)
1895 return 0;
1896
1897 if (IS_ZEBRA_IF_VXLAN(ifp))
1898 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
1899 vid);
1900
1901 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
1902 }
1903
1904 static int netlink_macfdb_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
1905 ns_id_t ns_id, int startup)
1906 {
1907 int len;
1908 struct ndmsg *ndm;
1909
1910 if (h->nlmsg_type != RTM_NEWNEIGH)
1911 return 0;
1912
1913 /* Length validity. */
1914 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
1915 if (len < 0)
1916 return -1;
1917
1918 /* We are interested only in AF_BRIDGE notifications. */
1919 ndm = NLMSG_DATA(h);
1920 if (ndm->ndm_family != AF_BRIDGE)
1921 return 0;
1922
1923 return netlink_macfdb_change(snl, h, len);
1924 }
1925
1926 /* Request for MAC FDB information from the kernel */
1927 static int netlink_request_macs(struct zebra_ns *zns, int family, int type,
1928 ifindex_t master_ifindex)
1929 {
1930 struct {
1931 struct nlmsghdr n;
1932 struct ifinfomsg ifm;
1933 char buf[256];
1934 } req;
1935
1936 /* Form the request, specifying filter (rtattr) if needed. */
1937 memset(&req, 0, sizeof(req));
1938 req.n.nlmsg_type = type;
1939 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1940 req.ifm.ifi_family = family;
1941 if (master_ifindex)
1942 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
1943
1944 return netlink_request(&zns->netlink_cmd, &req.n);
1945 }
1946
1947 /*
1948 * MAC forwarding database read using netlink interface. This is invoked
1949 * at startup.
1950 */
1951 int netlink_macfdb_read(struct zebra_ns *zns)
1952 {
1953 int ret;
1954
1955 /* Get bridge FDB table. */
1956 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH, 0);
1957 if (ret < 0)
1958 return ret;
1959 /* We are reading entire table. */
1960 filter_vlan = 0;
1961 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1962 0, 1);
1963
1964 return ret;
1965 }
1966
1967 /*
1968 * MAC forwarding database read using netlink interface. This is for a
1969 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1970 */
1971 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
1972 struct interface *br_if)
1973 {
1974 struct zebra_if *br_zif;
1975 struct zebra_if *zif;
1976 struct zebra_l2info_vxlan *vxl;
1977 int ret = 0;
1978
1979
1980 /* Save VLAN we're filtering on, if needed. */
1981 br_zif = (struct zebra_if *)br_if->info;
1982 zif = (struct zebra_if *)ifp->info;
1983 vxl = &zif->l2info.vxl;
1984 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
1985 filter_vlan = vxl->access_vlan;
1986
1987 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
1988 */
1989 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH,
1990 br_if->ifindex);
1991 if (ret < 0)
1992 return ret;
1993 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1994 0, 0);
1995
1996 /* Reset VLAN filter. */
1997 filter_vlan = 0;
1998 return ret;
1999 }
2000
2001 static int netlink_macfdb_update(struct interface *ifp, vlanid_t vid,
2002 struct ethaddr *mac, struct in_addr vtep_ip,
2003 int local, int cmd, u_char sticky)
2004 {
2005 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2006 struct {
2007 struct nlmsghdr n;
2008 struct ndmsg ndm;
2009 char buf[256];
2010 } req;
2011 int dst_alen;
2012 struct zebra_if *zif;
2013 struct interface *br_if;
2014 struct zebra_if *br_zif;
2015 char buf[ETHER_ADDR_STRLEN];
2016 int vid_present = 0, dst_present = 0;
2017 char vid_buf[20];
2018 char dst_buf[30];
2019
2020 zif = ifp->info;
2021 if ((br_if = zif->brslave_info.br_if) == NULL) {
2022 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
2023 (cmd == RTM_NEWNEIGH) ? "add" : "del", ifp->name,
2024 ifp->ifindex);
2025 return -1;
2026 }
2027
2028 memset(&req.n, 0, sizeof(req.n));
2029 memset(&req.ndm, 0, sizeof(req.ndm));
2030
2031 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2032 req.n.nlmsg_flags = NLM_F_REQUEST;
2033 if (cmd == RTM_NEWNEIGH)
2034 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2035 req.n.nlmsg_type = cmd;
2036 req.ndm.ndm_family = AF_BRIDGE;
2037 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2038 req.ndm.ndm_state = NUD_REACHABLE;
2039
2040 if (sticky)
2041 req.ndm.ndm_state |= NUD_NOARP;
2042 else
2043 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2044
2045 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2046 req.ndm.ndm_ifindex = ifp->ifindex;
2047 if (!local) {
2048 dst_alen = 4; // TODO: hardcoded
2049 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2050 dst_present = 1;
2051 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
2052 }
2053 br_zif = (struct zebra_if *)br_if->info;
2054 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2055 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2056 vid_present = 1;
2057 sprintf(vid_buf, " VLAN %u", vid);
2058 }
2059 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2060
2061 if (IS_ZEBRA_DEBUG_KERNEL)
2062 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2063 nl_msg_type_to_str(cmd),
2064 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2065 ifp->ifindex, vid_present ? vid_buf : "",
2066 sticky ? "sticky " : "",
2067 prefix_mac2str(mac, buf, sizeof(buf)),
2068 dst_present ? dst_buf : "");
2069
2070 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2071 0);
2072 }
2073
2074 #define NUD_VALID \
2075 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2076 | NUD_DELAY)
2077
2078 static int netlink_ipneigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2079 int len)
2080 {
2081 struct ndmsg *ndm;
2082 struct interface *ifp;
2083 struct zebra_if *zif;
2084 struct rtattr *tb[NDA_MAX + 1];
2085 struct interface *link_if;
2086 struct ethaddr mac;
2087 struct ipaddr ip;
2088 char buf[ETHER_ADDR_STRLEN];
2089 char buf2[INET6_ADDRSTRLEN];
2090 int mac_present = 0;
2091 u_char ext_learned;
2092
2093 ndm = NLMSG_DATA(h);
2094
2095 /* We only process neigh notifications if EVPN is enabled */
2096 if (!is_evpn_enabled())
2097 return 0;
2098
2099 /* The interface should exist. */
2100 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2101 ndm->ndm_ifindex);
2102 if (!ifp || !ifp->info)
2103 return 0;
2104
2105 /* Drop "permanent" entries. */
2106 if (ndm->ndm_state & NUD_PERMANENT)
2107 return 0;
2108
2109 zif = (struct zebra_if *)ifp->info;
2110 /* The neighbor is present on an SVI. From this, we locate the
2111 * underlying
2112 * bridge because we're only interested in neighbors on a VxLAN bridge.
2113 * The bridge is located based on the nature of the SVI:
2114 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2115 * interface
2116 * and is linked to the bridge
2117 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2118 * inteface
2119 * itself
2120 */
2121 if (IS_ZEBRA_IF_VLAN(ifp)) {
2122 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2123 zif->link_ifindex);
2124 if (!link_if)
2125 return 0;
2126 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2127 link_if = ifp;
2128 else
2129 return 0;
2130
2131 /* Parse attributes and extract fields of interest. */
2132 memset(tb, 0, sizeof tb);
2133 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2134
2135 if (!tb[NDA_DST]) {
2136 zlog_warn("%s family %s IF %s(%u) - no DST",
2137 nl_msg_type_to_str(h->nlmsg_type),
2138 nl_family_to_str(ndm->ndm_family), ifp->name,
2139 ndm->ndm_ifindex);
2140 return 0;
2141 }
2142 memset(&mac, 0, sizeof(struct ethaddr));
2143 memset(&ip, 0, sizeof(struct ipaddr));
2144 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2145 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2146
2147 if (h->nlmsg_type == RTM_NEWNEIGH) {
2148 if (tb[NDA_LLADDR]) {
2149 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2150 zlog_warn(
2151 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2152 nl_msg_type_to_str(h->nlmsg_type),
2153 nl_family_to_str(ndm->ndm_family),
2154 ifp->name, ndm->ndm_ifindex,
2155 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2156 return 0;
2157 }
2158
2159 mac_present = 1;
2160 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2161 }
2162
2163 ext_learned = (ndm->ndm_flags & NTF_EXT_LEARNED) ? 1 : 0;
2164
2165 if (IS_ZEBRA_DEBUG_KERNEL)
2166 zlog_debug(
2167 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2168 nl_msg_type_to_str(h->nlmsg_type),
2169 nl_family_to_str(ndm->ndm_family), ifp->name,
2170 ndm->ndm_ifindex,
2171 ipaddr2str(&ip, buf2, sizeof(buf2)),
2172 mac_present
2173 ? prefix_mac2str(&mac, buf, sizeof(buf))
2174 : "",
2175 ndm->ndm_state, ndm->ndm_flags);
2176
2177 /* If the neighbor state is valid for use, process as an add or
2178 * update
2179 * else process as a delete. Note that the delete handling may
2180 * result
2181 * in re-adding the neighbor if it is a valid "remote" neighbor.
2182 */
2183 if (ndm->ndm_state & NUD_VALID)
2184 return zebra_vxlan_local_neigh_add_update(
2185 ifp, link_if, &ip, &mac, ndm->ndm_state,
2186 ext_learned);
2187
2188 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2189 }
2190
2191 if (IS_ZEBRA_DEBUG_KERNEL)
2192 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2193 nl_msg_type_to_str(h->nlmsg_type),
2194 nl_family_to_str(ndm->ndm_family), ifp->name,
2195 ndm->ndm_ifindex,
2196 ipaddr2str(&ip, buf2, sizeof(buf2)));
2197
2198 /* Process the delete - it may result in re-adding the neighbor if it is
2199 * a valid "remote" neighbor.
2200 */
2201 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2202 }
2203
2204 static int netlink_neigh_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
2205 ns_id_t ns_id, int startup)
2206 {
2207 int len;
2208 struct ndmsg *ndm;
2209
2210 if (h->nlmsg_type != RTM_NEWNEIGH)
2211 return 0;
2212
2213 /* Length validity. */
2214 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2215 if (len < 0)
2216 return -1;
2217
2218 /* We are interested only in AF_INET or AF_INET6 notifications. */
2219 ndm = NLMSG_DATA(h);
2220 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2221 return 0;
2222
2223 return netlink_neigh_change(snl, h, len);
2224 }
2225
2226 /* Request for IP neighbor information from the kernel */
2227 static int netlink_request_neigh(struct zebra_ns *zns, int family, int type,
2228 ifindex_t ifindex)
2229 {
2230 struct {
2231 struct nlmsghdr n;
2232 struct ndmsg ndm;
2233 char buf[256];
2234 } req;
2235
2236 /* Form the request, specifying filter (rtattr) if needed. */
2237 memset(&req, 0, sizeof(req));
2238 req.n.nlmsg_type = type;
2239 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2240 req.ndm.ndm_family = family;
2241 if (ifindex)
2242 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2243
2244 return netlink_request(&zns->netlink_cmd, &req.n);
2245 }
2246
2247 /*
2248 * IP Neighbor table read using netlink interface. This is invoked
2249 * at startup.
2250 */
2251 int netlink_neigh_read(struct zebra_ns *zns)
2252 {
2253 int ret;
2254
2255 /* Get IP neighbor table. */
2256 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH, 0);
2257 if (ret < 0)
2258 return ret;
2259 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2260 1);
2261
2262 return ret;
2263 }
2264
2265 /*
2266 * IP Neighbor table read using netlink interface. This is for a specific
2267 * VLAN device.
2268 */
2269 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2270 {
2271 int ret = 0;
2272
2273 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH,
2274 vlan_if->ifindex);
2275 if (ret < 0)
2276 return ret;
2277 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2278 0);
2279
2280 return ret;
2281 }
2282
2283 int netlink_neigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2284 ns_id_t ns_id)
2285 {
2286 int len;
2287 struct ndmsg *ndm;
2288
2289 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2290 return 0;
2291
2292 /* Length validity. */
2293 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2294 if (len < 0)
2295 return -1;
2296
2297 /* Is this a notification for the MAC FDB or IP neighbor table? */
2298 ndm = NLMSG_DATA(h);
2299 if (ndm->ndm_family == AF_BRIDGE)
2300 return netlink_macfdb_change(snl, h, len);
2301
2302 if (ndm->ndm_type != RTN_UNICAST)
2303 return 0;
2304
2305 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2306 return netlink_ipneigh_change(snl, h, len);
2307
2308 return 0;
2309 }
2310
2311 static int netlink_neigh_update2(struct interface *ifp, struct ipaddr *ip,
2312 struct ethaddr *mac, u_int32_t flags, int cmd)
2313 {
2314 struct {
2315 struct nlmsghdr n;
2316 struct ndmsg ndm;
2317 char buf[256];
2318 } req;
2319 int ipa_len;
2320
2321 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2322 char buf[INET6_ADDRSTRLEN];
2323 char buf2[ETHER_ADDR_STRLEN];
2324
2325 memset(&req.n, 0, sizeof(req.n));
2326 memset(&req.ndm, 0, sizeof(req.ndm));
2327
2328 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2329 req.n.nlmsg_flags = NLM_F_REQUEST;
2330 if (cmd == RTM_NEWNEIGH)
2331 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2332 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2333 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2334 req.ndm.ndm_state = flags;
2335 req.ndm.ndm_ifindex = ifp->ifindex;
2336 req.ndm.ndm_type = RTN_UNICAST;
2337 req.ndm.ndm_flags = NTF_EXT_LEARNED;
2338
2339
2340 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2341 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2342 if (mac)
2343 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2344
2345 if (IS_ZEBRA_DEBUG_KERNEL)
2346 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2347 nl_msg_type_to_str(cmd),
2348 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2349 ifp->ifindex, ipaddr2str(ip, buf, sizeof(buf)),
2350 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2351 : "null");
2352
2353 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2354 0);
2355 }
2356
2357 int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2358 struct in_addr vtep_ip, u_char sticky)
2359 {
2360 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, 0, RTM_NEWNEIGH,
2361 sticky);
2362 }
2363
2364 int kernel_del_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2365 struct in_addr vtep_ip, int local)
2366 {
2367 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, local,
2368 RTM_DELNEIGH, 0);
2369 }
2370
2371 int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
2372 struct ethaddr *mac)
2373 {
2374 return netlink_neigh_update2(ifp, ip, mac, NUD_REACHABLE, RTM_NEWNEIGH);
2375 }
2376
2377 int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip)
2378 {
2379 return netlink_neigh_update2(ifp, ip, NULL, 0, RTM_DELNEIGH);
2380 }
2381
2382 /*
2383 * MPLS label forwarding table change via netlink interface.
2384 */
2385 int netlink_mpls_multipath(int cmd, zebra_lsp_t *lsp)
2386 {
2387 mpls_lse_t lse;
2388 zebra_nhlfe_t *nhlfe;
2389 struct nexthop *nexthop = NULL;
2390 unsigned int nexthop_num;
2391 const char *routedesc;
2392 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2393 int route_type;
2394
2395 struct {
2396 struct nlmsghdr n;
2397 struct rtmsg r;
2398 char buf[NL_PKT_BUF_SIZE];
2399 } req;
2400
2401 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
2402
2403 /*
2404 * Count # nexthops so we can decide whether to use singlepath
2405 * or multipath case.
2406 */
2407 nexthop_num = 0;
2408 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2409 nexthop = nhlfe->nexthop;
2410 if (!nexthop)
2411 continue;
2412 if (cmd == RTM_NEWROUTE) {
2413 /* Count all selected NHLFEs */
2414 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2415 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2416 nexthop_num++;
2417 } else /* DEL */
2418 {
2419 /* Count all installed NHLFEs */
2420 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2421 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2422 nexthop_num++;
2423 }
2424 }
2425
2426 if ((nexthop_num == 0) || (!lsp->best_nhlfe && (cmd != RTM_DELROUTE)))
2427 return 0;
2428
2429 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2430 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2431 req.n.nlmsg_type = cmd;
2432 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2433
2434 req.r.rtm_family = AF_MPLS;
2435 req.r.rtm_table = RT_TABLE_MAIN;
2436 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2437 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2438 req.r.rtm_type = RTN_UNICAST;
2439
2440 if (cmd == RTM_NEWROUTE) {
2441 /* We do a replace to handle update. */
2442 req.n.nlmsg_flags |= NLM_F_REPLACE;
2443
2444 /* set the protocol value if installing */
2445 route_type = re_type_from_lsp_type(lsp->best_nhlfe->type);
2446 req.r.rtm_protocol = zebra2proto(route_type);
2447 }
2448
2449 /* Fill destination */
2450 lse = mpls_lse_encode(lsp->ile.in_label, 0, 0, 1);
2451 addattr_l(&req.n, sizeof req, RTA_DST, &lse, sizeof(mpls_lse_t));
2452
2453 /* Fill nexthops (paths) based on single-path or multipath. The paths
2454 * chosen depend on the operation.
2455 */
2456 if (nexthop_num == 1 || multipath_num == 1) {
2457 routedesc = "single-path";
2458 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2459
2460 nexthop_num = 0;
2461 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2462 nexthop = nhlfe->nexthop;
2463 if (!nexthop)
2464 continue;
2465
2466 if ((cmd == RTM_NEWROUTE
2467 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2468 && CHECK_FLAG(nexthop->flags,
2469 NEXTHOP_FLAG_ACTIVE)))
2470 || (cmd == RTM_DELROUTE
2471 && (CHECK_FLAG(nhlfe->flags,
2472 NHLFE_FLAG_INSTALLED)
2473 && CHECK_FLAG(nexthop->flags,
2474 NEXTHOP_FLAG_FIB)))) {
2475 /* Add the gateway */
2476 _netlink_mpls_build_singlepath(routedesc, nhlfe,
2477 &req.n, &req.r,
2478 sizeof req, cmd);
2479 nexthop_num++;
2480 break;
2481 }
2482 }
2483 } else /* Multipath case */
2484 {
2485 char buf[NL_PKT_BUF_SIZE];
2486 struct rtattr *rta = (void *)buf;
2487 struct rtnexthop *rtnh;
2488 union g_addr *src1 = NULL;
2489
2490 rta->rta_type = RTA_MULTIPATH;
2491 rta->rta_len = RTA_LENGTH(0);
2492 rtnh = RTA_DATA(rta);
2493
2494 routedesc = "multipath";
2495 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2496
2497 nexthop_num = 0;
2498 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2499 nexthop = nhlfe->nexthop;
2500 if (!nexthop)
2501 continue;
2502
2503 if (nexthop_num >= multipath_num)
2504 break;
2505
2506 if ((cmd == RTM_NEWROUTE
2507 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2508 && CHECK_FLAG(nexthop->flags,
2509 NEXTHOP_FLAG_ACTIVE)))
2510 || (cmd == RTM_DELROUTE
2511 && (CHECK_FLAG(nhlfe->flags,
2512 NHLFE_FLAG_INSTALLED)
2513 && CHECK_FLAG(nexthop->flags,
2514 NEXTHOP_FLAG_FIB)))) {
2515 nexthop_num++;
2516
2517 /* Build the multipath */
2518 _netlink_mpls_build_multipath(routedesc, nhlfe,
2519 rta, rtnh, &req.r,
2520 &src1);
2521 rtnh = RTNH_NEXT(rtnh);
2522 }
2523 }
2524
2525 /* Add the multipath */
2526 if (rta->rta_len > RTA_LENGTH(0))
2527 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
2528 RTA_DATA(rta), RTA_PAYLOAD(rta));
2529 }
2530
2531 /* Talk to netlink socket. */
2532 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2533 0);
2534 }
2535 #endif /* HAVE_NETLINK */