]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
Merge pull request #1317 from donaldsharp/babel_mem
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26
27 /* Hack for GNU libc version 2. */
28 #ifndef MSG_TRUNC
29 #define MSG_TRUNC 0x20
30 #endif /* MSG_TRUNC */
31
32 #include "linklist.h"
33 #include "if.h"
34 #include "log.h"
35 #include "prefix.h"
36 #include "connected.h"
37 #include "table.h"
38 #include "memory.h"
39 #include "zebra_memory.h"
40 #include "rib.h"
41 #include "thread.h"
42 #include "privs.h"
43 #include "nexthop.h"
44 #include "vrf.h"
45 #include "vty.h"
46 #include "mpls.h"
47 #include "vxlan.h"
48
49 #include "zebra/zserv.h"
50 #include "zebra/zebra_ns.h"
51 #include "zebra/zebra_vrf.h"
52 #include "zebra/rt.h"
53 #include "zebra/redistribute.h"
54 #include "zebra/interface.h"
55 #include "zebra/debug.h"
56 #include "zebra/rtadv.h"
57 #include "zebra/zebra_ptm.h"
58 #include "zebra/zebra_mpls.h"
59 #include "zebra/kernel_netlink.h"
60 #include "zebra/rt_netlink.h"
61 #include "zebra/zebra_mroute.h"
62 #include "zebra/zebra_vxlan.h"
63
64
65 /* TODO - Temporary definitions, need to refine. */
66 #ifndef AF_MPLS
67 #define AF_MPLS 28
68 #endif
69
70 #ifndef RTA_VIA
71 #define RTA_VIA 18
72 #endif
73
74 #ifndef RTA_NEWDST
75 #define RTA_NEWDST 19
76 #endif
77
78 #ifndef RTA_ENCAP_TYPE
79 #define RTA_ENCAP_TYPE 21
80 #endif
81
82 #ifndef RTA_ENCAP
83 #define RTA_ENCAP 22
84 #endif
85
86 #ifndef RTA_EXPIRES
87 #define RTA_EXPIRES 23
88 #endif
89
90 #ifndef LWTUNNEL_ENCAP_MPLS
91 #define LWTUNNEL_ENCAP_MPLS 1
92 #endif
93
94 #ifndef MPLS_IPTUNNEL_DST
95 #define MPLS_IPTUNNEL_DST 1
96 #endif
97
98 #ifndef NDA_MASTER
99 #define NDA_MASTER 9
100 #endif
101
102 #ifndef NTF_MASTER
103 #define NTF_MASTER 0x04
104 #endif
105
106 #ifndef NTF_SELF
107 #define NTF_SELF 0x02
108 #endif
109
110 #ifndef NTF_EXT_LEARNED
111 #define NTF_EXT_LEARNED 0x10
112 #endif
113
114 #ifndef NDA_IFINDEX
115 #define NDA_IFINDEX 8
116 #endif
117
118 #ifndef NDA_VLAN
119 #define NDA_VLAN 5
120 #endif
121 /* End of temporary definitions */
122
123 static vlanid_t filter_vlan = 0;
124
125 struct gw_family_t {
126 u_int16_t filler;
127 u_int16_t family;
128 union g_addr gate;
129 };
130
131 char ipv4_ll_buf[16] = "169.254.0.1";
132 struct in_addr ipv4_ll;
133
134 /*
135 * The ipv4_ll data structure is used for all 5549
136 * additions to the kernel. Let's figure out the
137 * correct value one time instead for every
138 * install/remove of a 5549 type route
139 */
140 void rt_netlink_init(void)
141 {
142 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
143 }
144
145 static inline int is_selfroute(int proto)
146 {
147 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
148 || (proto == RTPROT_STATIC) || (proto == RTPROT_ZEBRA)
149 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
150 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
151 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
152 || (proto == RTPROT_RIP)) {
153 return 1;
154 }
155
156 return 0;
157 }
158
159 static inline int zebra2proto(int proto)
160 {
161 switch (proto) {
162 case ZEBRA_ROUTE_BABEL:
163 proto = RTPROT_BABEL;
164 break;
165 case ZEBRA_ROUTE_BGP:
166 proto = RTPROT_BGP;
167 break;
168 case ZEBRA_ROUTE_OSPF:
169 case ZEBRA_ROUTE_OSPF6:
170 proto = RTPROT_OSPF;
171 break;
172 case ZEBRA_ROUTE_STATIC:
173 proto = RTPROT_STATIC;
174 break;
175 case ZEBRA_ROUTE_ISIS:
176 proto = RTPROT_ISIS;
177 break;
178 case ZEBRA_ROUTE_RIP:
179 proto = RTPROT_RIP;
180 break;
181 case ZEBRA_ROUTE_RIPNG:
182 proto = RTPROT_RIPNG;
183 break;
184 case ZEBRA_ROUTE_NHRP:
185 proto = RTPROT_NHRP;
186 break;
187 case ZEBRA_ROUTE_EIGRP:
188 proto = RTPROT_EIGRP;
189 break;
190 case ZEBRA_ROUTE_LDP:
191 proto = RTPROT_LDP;
192 break;
193 default:
194 proto = RTPROT_ZEBRA;
195 break;
196 }
197
198 return proto;
199 }
200
201 static inline int proto2zebra(int proto, int family)
202 {
203 switch (proto) {
204 case RTPROT_BABEL:
205 proto = ZEBRA_ROUTE_BABEL;
206 break;
207 case RTPROT_BGP:
208 proto = ZEBRA_ROUTE_BGP;
209 break;
210 case RTPROT_OSPF:
211 proto = (family == AFI_IP) ?
212 ZEBRA_ROUTE_OSPF : ZEBRA_ROUTE_OSPF6;
213 break;
214 case RTPROT_ISIS:
215 proto = ZEBRA_ROUTE_ISIS;
216 break;
217 case RTPROT_RIP:
218 proto = ZEBRA_ROUTE_RIP;
219 break;
220 case RTPROT_RIPNG:
221 proto = ZEBRA_ROUTE_RIPNG;
222 break;
223 case RTPROT_NHRP:
224 proto = ZEBRA_ROUTE_NHRP;
225 break;
226 case RTPROT_EIGRP:
227 proto = ZEBRA_ROUTE_EIGRP;
228 break;
229 case RTPROT_LDP:
230 proto = ZEBRA_ROUTE_LDP;
231 break;
232 case RTPROT_STATIC:
233 proto = ZEBRA_ROUTE_STATIC;
234 break;
235 default:
236 proto = ZEBRA_ROUTE_KERNEL;
237 break;
238 }
239 return proto;
240 }
241
242 /*
243 Pending: create an efficient table_id (in a tree/hash) based lookup)
244 */
245 static vrf_id_t vrf_lookup_by_table(u_int32_t table_id)
246 {
247 struct vrf *vrf;
248 struct zebra_vrf *zvrf;
249
250 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
251 if ((zvrf = vrf->info) == NULL || (zvrf->table_id != table_id))
252 continue;
253
254 return zvrf_id(zvrf);
255 }
256
257 return VRF_DEFAULT;
258 }
259
260 /* Looking up routing table by netlink interface. */
261 static int netlink_route_change_read_unicast(struct sockaddr_nl *snl,
262 struct nlmsghdr *h, ns_id_t ns_id,
263 int startup)
264 {
265 int len;
266 struct rtmsg *rtm;
267 struct rtattr *tb[RTA_MAX + 1];
268 u_char flags = 0;
269 struct prefix p;
270 struct prefix_ipv6 src_p = {};
271 vrf_id_t vrf_id = VRF_DEFAULT;
272
273 char anyaddr[16] = {0};
274
275 int proto = ZEBRA_ROUTE_KERNEL;
276 int index = 0;
277 int table;
278 int metric = 0;
279 u_int32_t mtu = 0;
280 uint8_t distance = 0;
281
282 void *dest = NULL;
283 void *gate = NULL;
284 void *prefsrc = NULL; /* IPv4 preferred source host address */
285 void *src = NULL; /* IPv6 srcdest source prefix */
286 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
287
288 rtm = NLMSG_DATA(h);
289
290 if (startup && h->nlmsg_type != RTM_NEWROUTE)
291 return 0;
292 switch (rtm->rtm_type) {
293 case RTN_UNICAST:
294 break;
295 case RTN_BLACKHOLE:
296 bh_type = BLACKHOLE_NULL;
297 break;
298 case RTN_UNREACHABLE:
299 bh_type = BLACKHOLE_REJECT;
300 break;
301 case RTN_PROHIBIT:
302 bh_type = BLACKHOLE_ADMINPROHIB;
303 break;
304 default:
305 return 0;
306 }
307
308 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
309 if (len < 0)
310 return -1;
311
312 memset(tb, 0, sizeof tb);
313 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
314
315 if (rtm->rtm_flags & RTM_F_CLONED)
316 return 0;
317 if (rtm->rtm_protocol == RTPROT_REDIRECT)
318 return 0;
319 if (rtm->rtm_protocol == RTPROT_KERNEL)
320 return 0;
321
322 if (!startup && is_selfroute(rtm->rtm_protocol)
323 && h->nlmsg_type == RTM_NEWROUTE)
324 return 0;
325
326 /* We don't care about change notifications for the MPLS table. */
327 /* TODO: Revisit this. */
328 if (rtm->rtm_family == AF_MPLS)
329 return 0;
330
331 /* Table corresponding to route. */
332 if (tb[RTA_TABLE])
333 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
334 else
335 table = rtm->rtm_table;
336
337 /* Map to VRF */
338 vrf_id = vrf_lookup_by_table(table);
339 if (vrf_id == VRF_DEFAULT) {
340 if (!is_zebra_valid_kernel_table(table)
341 && !is_zebra_main_routing_table(table))
342 return 0;
343 }
344
345 /* Route which inserted by Zebra. */
346 if (is_selfroute(rtm->rtm_protocol)) {
347 flags |= ZEBRA_FLAG_SELFROUTE;
348 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
349 }
350 if (tb[RTA_OIF])
351 index = *(int *)RTA_DATA(tb[RTA_OIF]);
352
353 if (tb[RTA_DST])
354 dest = RTA_DATA(tb[RTA_DST]);
355 else
356 dest = anyaddr;
357
358 if (tb[RTA_SRC])
359 src = RTA_DATA(tb[RTA_SRC]);
360 else
361 src = anyaddr;
362
363 if (tb[RTA_PREFSRC])
364 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
365
366 if (tb[RTA_GATEWAY])
367 gate = RTA_DATA(tb[RTA_GATEWAY]);
368
369 if (tb[RTA_PRIORITY])
370 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
371
372 if (tb[RTA_METRICS]) {
373 struct rtattr *mxrta[RTAX_MAX + 1];
374
375 memset(mxrta, 0, sizeof mxrta);
376 netlink_parse_rtattr(mxrta, RTAX_MAX,
377 RTA_DATA(tb[RTA_METRICS]),
378 RTA_PAYLOAD(tb[RTA_METRICS]));
379
380 if (mxrta[RTAX_MTU])
381 mtu = *(u_int32_t *)RTA_DATA(mxrta[RTAX_MTU]);
382 }
383
384 if (rtm->rtm_family == AF_INET) {
385 p.family = AF_INET;
386 memcpy(&p.u.prefix4, dest, 4);
387 p.prefixlen = rtm->rtm_dst_len;
388
389 src_p.prefixlen =
390 0; // Forces debug below to not display anything
391 } else if (rtm->rtm_family == AF_INET6) {
392 p.family = AF_INET6;
393 memcpy(&p.u.prefix6, dest, 16);
394 p.prefixlen = rtm->rtm_dst_len;
395
396 src_p.family = AF_INET6;
397 memcpy(&src_p.prefix, src, 16);
398 src_p.prefixlen = rtm->rtm_src_len;
399 }
400
401 if (rtm->rtm_src_len != 0) {
402 char buf[PREFIX_STRLEN];
403 zlog_warn(
404 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
405 prefix2str(&p, buf, sizeof(buf)), vrf_id);
406 return 0;
407 }
408
409 /*
410 * For ZEBRA_ROUTE_KERNEL types:
411 *
412 * The metric/priority of the route received from the kernel
413 * is a 32 bit number. We are going to interpret the high
414 * order byte as the Admin Distance and the low order 3 bytes
415 * as the metric.
416 *
417 * This will allow us to do two things:
418 * 1) Allow the creation of kernel routes that can be
419 * overridden by zebra.
420 * 2) Allow the old behavior for 'most' kernel route types
421 * if a user enters 'ip route ...' v4 routes get a metric
422 * of 0 and v6 routes get a metric of 1024. Both of these
423 * values will end up with a admin distance of 0, which
424 * will cause them to win for the purposes of zebra.
425 */
426 if (proto == ZEBRA_ROUTE_KERNEL) {
427 distance = (metric >> 24) & 0xFF;
428 metric = (metric & 0x00FFFFFF);
429 }
430
431 if (IS_ZEBRA_DEBUG_KERNEL) {
432 char buf[PREFIX_STRLEN];
433 char buf2[PREFIX_STRLEN];
434 zlog_debug(
435 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h->nlmsg_type),
436 prefix2str(&p, buf, sizeof(buf)),
437 src_p.prefixlen ? " from " : "",
438 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
439 : "",
440 vrf_id, metric, distance);
441 }
442
443 afi_t afi = AFI_IP;
444 if (rtm->rtm_family == AF_INET6)
445 afi = AFI_IP6;
446
447 if (h->nlmsg_type == RTM_NEWROUTE) {
448 if (!tb[RTA_MULTIPATH]) {
449 struct nexthop nh;
450 size_t sz = (afi == AFI_IP) ? 4 : 16;
451
452 memset(&nh, 0, sizeof(nh));
453
454 if (bh_type == BLACKHOLE_UNSPEC) {
455 if (index && !gate)
456 nh.type = NEXTHOP_TYPE_IFINDEX;
457 else if (index && gate)
458 nh.type = (afi == AFI_IP)
459 ? NEXTHOP_TYPE_IPV4_IFINDEX
460 : NEXTHOP_TYPE_IPV6_IFINDEX;
461 else if (!index && gate)
462 nh.type = (afi == AFI_IP)
463 ? NEXTHOP_TYPE_IPV4
464 : NEXTHOP_TYPE_IPV6;
465 else {
466 nh.type = NEXTHOP_TYPE_BLACKHOLE;
467 nh.bh_type = bh_type;
468 }
469 } else {
470 nh.type = NEXTHOP_TYPE_BLACKHOLE;
471 nh.bh_type = bh_type;
472 }
473 nh.ifindex = index;
474 if (prefsrc)
475 memcpy(&nh.src, prefsrc, sz);
476 if (gate)
477 memcpy(&nh.gate, gate, sz);
478
479 rib_add(afi, SAFI_UNICAST, vrf_id, proto,
480 0, flags, &p, NULL, &nh, table, metric, mtu, distance);
481 } else {
482 /* This is a multipath route */
483
484 struct route_entry *re;
485 struct rtnexthop *rtnh =
486 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
487
488 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
489
490 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
491 re->type = proto;
492 re->distance = distance;
493 re->flags = flags;
494 re->metric = metric;
495 re->mtu = mtu;
496 re->vrf_id = vrf_id;
497 re->table = table;
498 re->nexthop_num = 0;
499 re->uptime = time(NULL);
500
501 for (;;) {
502 if (len < (int)sizeof(*rtnh)
503 || rtnh->rtnh_len > len)
504 break;
505
506 index = rtnh->rtnh_ifindex;
507 gate = 0;
508 if (rtnh->rtnh_len > sizeof(*rtnh)) {
509 memset(tb, 0, sizeof(tb));
510 netlink_parse_rtattr(
511 tb, RTA_MAX, RTNH_DATA(rtnh),
512 rtnh->rtnh_len - sizeof(*rtnh));
513 if (tb[RTA_GATEWAY])
514 gate = RTA_DATA(
515 tb[RTA_GATEWAY]);
516 }
517
518 if (gate) {
519 if (rtm->rtm_family == AF_INET) {
520 if (index)
521 route_entry_nexthop_ipv4_ifindex_add(
522 re, gate,
523 prefsrc, index);
524 else
525 route_entry_nexthop_ipv4_add(
526 re, gate,
527 prefsrc);
528 } else if (rtm->rtm_family
529 == AF_INET6) {
530 if (index)
531 route_entry_nexthop_ipv6_ifindex_add(
532 re, gate,
533 index);
534 else
535 route_entry_nexthop_ipv6_add(
536 re, gate);
537 }
538 } else
539 route_entry_nexthop_ifindex_add(re,
540 index);
541
542 len -= NLMSG_ALIGN(rtnh->rtnh_len);
543 rtnh = RTNH_NEXT(rtnh);
544 }
545
546 zserv_nexthop_num_warn(__func__,
547 (const struct prefix *)&p,
548 re->nexthop_num);
549 if (re->nexthop_num == 0)
550 XFREE(MTYPE_RE, re);
551 else
552 rib_add_multipath(afi, SAFI_UNICAST, &p,
553 NULL, re);
554 }
555 } else {
556 if (!tb[RTA_MULTIPATH]) {
557 struct nexthop nh;
558 size_t sz = (afi == AFI_IP) ? 4 : 16;
559
560 memset(&nh, 0, sizeof(nh));
561 if (bh_type == BLACKHOLE_UNSPEC) {
562 if (index && !gate)
563 nh.type = NEXTHOP_TYPE_IFINDEX;
564 else if (index && gate)
565 nh.type =
566 (afi == AFI_IP)
567 ? NEXTHOP_TYPE_IPV4_IFINDEX
568 : NEXTHOP_TYPE_IPV6_IFINDEX;
569 else if (!index && gate)
570 nh.type = (afi == AFI_IP)
571 ? NEXTHOP_TYPE_IPV4
572 : NEXTHOP_TYPE_IPV6;
573 else {
574 nh.type = NEXTHOP_TYPE_BLACKHOLE;
575 nh.bh_type = BLACKHOLE_UNSPEC;
576 }
577 } else {
578 nh.type = NEXTHOP_TYPE_BLACKHOLE;
579 nh.bh_type = bh_type;
580 }
581 nh.ifindex = index;
582 if (gate)
583 memcpy(&nh.gate, gate, sz);
584 rib_delete(afi, SAFI_UNICAST, vrf_id,
585 proto, 0, flags, &p, NULL, &nh,
586 table, metric, true);
587 } else {
588 /* XXX: need to compare the entire list of nexthops
589 * here for NLM_F_APPEND stupidity */
590 rib_delete(afi, SAFI_UNICAST, vrf_id,
591 proto, 0, flags, &p, NULL, NULL,
592 table, metric, true);
593 }
594 }
595
596 return 0;
597 }
598
599 static struct mcast_route_data *mroute = NULL;
600
601 static int netlink_route_change_read_multicast(struct sockaddr_nl *snl,
602 struct nlmsghdr *h,
603 ns_id_t ns_id, int startup)
604 {
605 int len;
606 struct rtmsg *rtm;
607 struct rtattr *tb[RTA_MAX + 1];
608 struct mcast_route_data *m;
609 struct mcast_route_data mr;
610 int iif = 0;
611 int count;
612 int oif[256];
613 int oif_count = 0;
614 char sbuf[40];
615 char gbuf[40];
616 char oif_list[256] = "\0";
617 vrf_id_t vrf = ns_id;
618 int table;
619
620 if (mroute)
621 m = mroute;
622 else {
623 memset(&mr, 0, sizeof(mr));
624 m = &mr;
625 }
626
627 rtm = NLMSG_DATA(h);
628
629 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
630
631 memset(tb, 0, sizeof tb);
632 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
633
634 if (tb[RTA_TABLE])
635 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
636 else
637 table = rtm->rtm_table;
638
639 vrf = vrf_lookup_by_table(table);
640
641 if (tb[RTA_IIF])
642 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
643
644 if (tb[RTA_SRC])
645 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
646
647 if (tb[RTA_DST])
648 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
649
650 if ((RTA_EXPIRES <= RTA_MAX) && tb[RTA_EXPIRES])
651 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
652
653 if (tb[RTA_MULTIPATH]) {
654 struct rtnexthop *rtnh =
655 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
656
657 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
658 for (;;) {
659 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
660 break;
661
662 oif[oif_count] = rtnh->rtnh_ifindex;
663 oif_count++;
664
665 len -= NLMSG_ALIGN(rtnh->rtnh_len);
666 rtnh = RTNH_NEXT(rtnh);
667 }
668 }
669
670 if (IS_ZEBRA_DEBUG_KERNEL) {
671 struct interface *ifp;
672 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
673 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
674 for (count = 0; count < oif_count; count++) {
675 ifp = if_lookup_by_index(oif[count], vrf);
676 char temp[256];
677
678 sprintf(temp, "%s ", ifp->name);
679 strcat(oif_list, temp);
680 }
681 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf);
682 ifp = if_lookup_by_index(iif, vrf);
683 zlog_debug(
684 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
685 zvrf->vrf->name, vrf, nl_msg_type_to_str(h->nlmsg_type),
686 sbuf, gbuf, ifp->name, oif_list, m->lastused);
687 }
688 return 0;
689 }
690
691 int netlink_route_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
692 ns_id_t ns_id, int startup)
693 {
694 int len;
695 vrf_id_t vrf_id = ns_id;
696 struct rtmsg *rtm;
697
698 rtm = NLMSG_DATA(h);
699
700 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
701 /* If this is not route add/delete message print warning. */
702 zlog_warn("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
703 return 0;
704 }
705
706 /* Connected route. */
707 if (IS_ZEBRA_DEBUG_KERNEL)
708 zlog_debug("%s %s %s proto %s vrf %u",
709 nl_msg_type_to_str(h->nlmsg_type),
710 nl_family_to_str(rtm->rtm_family),
711 nl_rttype_to_str(rtm->rtm_type),
712 nl_rtproto_to_str(rtm->rtm_protocol), vrf_id);
713
714 /* We don't care about change notifications for the MPLS table. */
715 /* TODO: Revisit this. */
716 if (rtm->rtm_family == AF_MPLS)
717 return 0;
718
719 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
720 if (len < 0)
721 return -1;
722
723 if (rtm->rtm_type == RTN_MULTICAST)
724 netlink_route_change_read_multicast(snl, h, ns_id, startup);
725 else
726 netlink_route_change_read_unicast(snl, h, ns_id, startup);
727 return 0;
728 }
729
730 /* Request for specific route information from the kernel */
731 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
732 {
733 struct {
734 struct nlmsghdr n;
735 struct rtmsg rtm;
736 } req;
737
738 /* Form the request, specifying filter (rtattr) if needed. */
739 memset(&req, 0, sizeof(req));
740 req.n.nlmsg_type = type;
741 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
742 req.rtm.rtm_family = family;
743
744 return netlink_request(&zns->netlink_cmd, &req.n);
745 }
746
747 /* Routing table read function using netlink interface. Only called
748 bootstrap time. */
749 int netlink_route_read(struct zebra_ns *zns)
750 {
751 int ret;
752
753 /* Get IPv4 routing table. */
754 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
755 if (ret < 0)
756 return ret;
757 ret = netlink_parse_info(netlink_route_change_read_unicast,
758 &zns->netlink_cmd, zns, 0, 1);
759 if (ret < 0)
760 return ret;
761
762 /* Get IPv6 routing table. */
763 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
764 if (ret < 0)
765 return ret;
766 ret = netlink_parse_info(netlink_route_change_read_unicast,
767 &zns->netlink_cmd, zns, 0, 1);
768 if (ret < 0)
769 return ret;
770
771 return 0;
772 }
773
774 static void _netlink_route_nl_add_gateway_info(u_char route_family,
775 u_char gw_family,
776 struct nlmsghdr *nlmsg,
777 size_t req_size, int bytelen,
778 struct nexthop *nexthop)
779 {
780 if (route_family == AF_MPLS) {
781 struct gw_family_t gw_fam;
782
783 gw_fam.family = gw_family;
784 if (gw_family == AF_INET)
785 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
786 else
787 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
788 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
789 bytelen + 2);
790 } else {
791 if (gw_family == AF_INET)
792 addattr_l(nlmsg, req_size, RTA_GATEWAY,
793 &nexthop->gate.ipv4, bytelen);
794 else
795 addattr_l(nlmsg, req_size, RTA_GATEWAY,
796 &nexthop->gate.ipv6, bytelen);
797 }
798 }
799
800 static void _netlink_route_rta_add_gateway_info(u_char route_family,
801 u_char gw_family,
802 struct rtattr *rta,
803 struct rtnexthop *rtnh,
804 size_t req_size, int bytelen,
805 struct nexthop *nexthop)
806 {
807 if (route_family == AF_MPLS) {
808 struct gw_family_t gw_fam;
809
810 gw_fam.family = gw_family;
811 if (gw_family == AF_INET)
812 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
813 else
814 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
815 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
816 bytelen + 2);
817 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
818 } else {
819 if (gw_family == AF_INET)
820 rta_addattr_l(rta, req_size, RTA_GATEWAY,
821 &nexthop->gate.ipv4, bytelen);
822 else
823 rta_addattr_l(rta, req_size, RTA_GATEWAY,
824 &nexthop->gate.ipv6, bytelen);
825 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
826 }
827 }
828
829 /* This function takes a nexthop as argument and adds
830 * the appropriate netlink attributes to an existing
831 * netlink message.
832 *
833 * @param routedesc: Human readable description of route type
834 * (direct/recursive, single-/multipath)
835 * @param bytelen: Length of addresses in bytes.
836 * @param nexthop: Nexthop information
837 * @param nlmsg: nlmsghdr structure to fill in.
838 * @param req_size: The size allocated for the message.
839 */
840 static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
841 struct nexthop *nexthop,
842 struct nlmsghdr *nlmsg,
843 struct rtmsg *rtmsg,
844 size_t req_size, int cmd)
845 {
846 struct nexthop_label *nh_label;
847 mpls_lse_t out_lse[MPLS_MAX_LABELS];
848 char label_buf[256];
849
850 /*
851 * label_buf is *only* currently used within debugging.
852 * As such when we assign it we are guarding it inside
853 * a debug test. If you want to change this make sure
854 * you fix this assumption
855 */
856 label_buf[0] = '\0';
857 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
858 * (in the case of LER)
859 */
860 nh_label = nexthop->nh_label;
861 if (rtmsg->rtm_family == AF_MPLS) {
862 assert(nh_label);
863 assert(nh_label->num_labels == 1);
864 }
865
866 if (nh_label && nh_label->num_labels) {
867 int i, num_labels = 0;
868 u_int32_t bos;
869 char label_buf1[20];
870
871 for (i = 0; i < nh_label->num_labels; i++) {
872 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
873 bos = ((i == (nh_label->num_labels - 1)) ? 1
874 : 0);
875 out_lse[i] = mpls_lse_encode(nh_label->label[i],
876 0, 0, bos);
877 if (IS_ZEBRA_DEBUG_KERNEL) {
878 if (!num_labels)
879 sprintf(label_buf, "label %u",
880 nh_label->label[i]);
881 else {
882 sprintf(label_buf1, "/%u",
883 nh_label->label[i]);
884 strlcat(label_buf, label_buf1,
885 sizeof(label_buf));
886 }
887 }
888 num_labels++;
889 }
890 }
891 if (num_labels) {
892 if (rtmsg->rtm_family == AF_MPLS)
893 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
894 num_labels * sizeof(mpls_lse_t));
895 else {
896 struct rtattr *nest;
897 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
898
899 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE,
900 &encap, sizeof(u_int16_t));
901 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
902 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST,
903 &out_lse,
904 num_labels * sizeof(mpls_lse_t));
905 addattr_nest_end(nlmsg, nest);
906 }
907 }
908 }
909
910 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
911 rtmsg->rtm_flags |= RTNH_F_ONLINK;
912
913 if (rtmsg->rtm_family == AF_INET
914 && (nexthop->type == NEXTHOP_TYPE_IPV6
915 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
916 rtmsg->rtm_flags |= RTNH_F_ONLINK;
917 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
918 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
919
920 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
921 addattr_l(nlmsg, req_size, RTA_PREFSRC,
922 &nexthop->rmap_src.ipv4, bytelen);
923 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
924 addattr_l(nlmsg, req_size, RTA_PREFSRC,
925 &nexthop->src.ipv4, bytelen);
926
927 if (IS_ZEBRA_DEBUG_KERNEL)
928 zlog_debug(
929 " 5549: _netlink_route_build_singlepath() (%s): "
930 "nexthop via %s %s if %u",
931 routedesc, ipv4_ll_buf, label_buf,
932 nexthop->ifindex);
933 return;
934 }
935
936 if (nexthop->type == NEXTHOP_TYPE_IPV4
937 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
938 /* Send deletes to the kernel without specifying the next-hop */
939 if (cmd != RTM_DELROUTE)
940 _netlink_route_nl_add_gateway_info(
941 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
942 bytelen, nexthop);
943
944 if (cmd == RTM_NEWROUTE) {
945 if (nexthop->rmap_src.ipv4.s_addr)
946 addattr_l(nlmsg, req_size, RTA_PREFSRC,
947 &nexthop->rmap_src.ipv4, bytelen);
948 else if (nexthop->src.ipv4.s_addr)
949 addattr_l(nlmsg, req_size, RTA_PREFSRC,
950 &nexthop->src.ipv4, bytelen);
951 }
952
953 if (IS_ZEBRA_DEBUG_KERNEL)
954 zlog_debug(
955 "netlink_route_multipath() (%s): "
956 "nexthop via %s %s if %u",
957 routedesc, inet_ntoa(nexthop->gate.ipv4),
958 label_buf, nexthop->ifindex);
959 }
960
961 if (nexthop->type == NEXTHOP_TYPE_IPV6
962 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
963 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
964 nlmsg, req_size, bytelen,
965 nexthop);
966
967 if (cmd == RTM_NEWROUTE) {
968 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
969 addattr_l(nlmsg, req_size, RTA_PREFSRC,
970 &nexthop->rmap_src.ipv6, bytelen);
971 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
972 addattr_l(nlmsg, req_size, RTA_PREFSRC,
973 &nexthop->src.ipv6, bytelen);
974 }
975
976 if (IS_ZEBRA_DEBUG_KERNEL)
977 zlog_debug(
978 "netlink_route_multipath() (%s): "
979 "nexthop via %s %s if %u",
980 routedesc, inet6_ntoa(nexthop->gate.ipv6),
981 label_buf, nexthop->ifindex);
982 }
983 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
984 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
985 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
986
987 if (cmd == RTM_NEWROUTE) {
988 if (nexthop->rmap_src.ipv4.s_addr)
989 addattr_l(nlmsg, req_size, RTA_PREFSRC,
990 &nexthop->rmap_src.ipv4, bytelen);
991 else if (nexthop->src.ipv4.s_addr)
992 addattr_l(nlmsg, req_size, RTA_PREFSRC,
993 &nexthop->src.ipv4, bytelen);
994 }
995
996 if (IS_ZEBRA_DEBUG_KERNEL)
997 zlog_debug(
998 "netlink_route_multipath() (%s): "
999 "nexthop via if %u",
1000 routedesc, nexthop->ifindex);
1001 }
1002
1003 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1004 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1005
1006 if (cmd == RTM_NEWROUTE) {
1007 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1008 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1009 &nexthop->rmap_src.ipv6, bytelen);
1010 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1011 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1012 &nexthop->src.ipv6, bytelen);
1013 }
1014
1015 if (IS_ZEBRA_DEBUG_KERNEL)
1016 zlog_debug(
1017 "netlink_route_multipath() (%s): "
1018 "nexthop via if %u",
1019 routedesc, nexthop->ifindex);
1020 }
1021 }
1022
1023 /* This function takes a nexthop as argument and
1024 * appends to the given rtattr/rtnexthop pair the
1025 * representation of the nexthop. If the nexthop
1026 * defines a preferred source, the src parameter
1027 * will be modified to point to that src, otherwise
1028 * it will be kept unmodified.
1029 *
1030 * @param routedesc: Human readable description of route type
1031 * (direct/recursive, single-/multipath)
1032 * @param bytelen: Length of addresses in bytes.
1033 * @param nexthop: Nexthop information
1034 * @param rta: rtnetlink attribute structure
1035 * @param rtnh: pointer to an rtnetlink nexthop structure
1036 * @param src: pointer pointing to a location where
1037 * the prefsrc should be stored.
1038 */
1039 static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1040 struct nexthop *nexthop,
1041 struct rtattr *rta,
1042 struct rtnexthop *rtnh,
1043 struct rtmsg *rtmsg,
1044 union g_addr **src)
1045 {
1046 struct nexthop_label *nh_label;
1047 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1048 char label_buf[256];
1049
1050 rtnh->rtnh_len = sizeof(*rtnh);
1051 rtnh->rtnh_flags = 0;
1052 rtnh->rtnh_hops = 0;
1053 rta->rta_len += rtnh->rtnh_len;
1054
1055 /*
1056 * label_buf is *only* currently used within debugging.
1057 * As such when we assign it we are guarding it inside
1058 * a debug test. If you want to change this make sure
1059 * you fix this assumption
1060 */
1061 label_buf[0] = '\0';
1062 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1063 * (in the case of LER)
1064 */
1065 nh_label = nexthop->nh_label;
1066 if (rtmsg->rtm_family == AF_MPLS) {
1067 assert(nh_label);
1068 assert(nh_label->num_labels == 1);
1069 }
1070
1071 if (nh_label && nh_label->num_labels) {
1072 int i, num_labels = 0;
1073 u_int32_t bos;
1074 char label_buf1[20];
1075
1076 for (i = 0; i < nh_label->num_labels; i++) {
1077 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
1078 bos = ((i == (nh_label->num_labels - 1)) ? 1
1079 : 0);
1080 out_lse[i] = mpls_lse_encode(nh_label->label[i],
1081 0, 0, bos);
1082 if (IS_ZEBRA_DEBUG_KERNEL) {
1083 if (!num_labels)
1084 sprintf(label_buf, "label %u",
1085 nh_label->label[i]);
1086 else {
1087 sprintf(label_buf1, "/%u",
1088 nh_label->label[i]);
1089 strlcat(label_buf, label_buf1,
1090 sizeof(label_buf));
1091 }
1092 }
1093 num_labels++;
1094 }
1095 }
1096 if (num_labels) {
1097 if (rtmsg->rtm_family == AF_MPLS) {
1098 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1099 &out_lse,
1100 num_labels * sizeof(mpls_lse_t));
1101 rtnh->rtnh_len += RTA_LENGTH(
1102 num_labels * sizeof(mpls_lse_t));
1103 } else {
1104 struct rtattr *nest;
1105 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
1106 int len = rta->rta_len;
1107
1108 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1109 RTA_ENCAP_TYPE, &encap,
1110 sizeof(u_int16_t));
1111 nest = rta_nest(rta, NL_PKT_BUF_SIZE,
1112 RTA_ENCAP);
1113 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1114 MPLS_IPTUNNEL_DST, &out_lse,
1115 num_labels * sizeof(mpls_lse_t));
1116 rta_nest_end(rta, nest);
1117 rtnh->rtnh_len += rta->rta_len - len;
1118 }
1119 }
1120 }
1121
1122 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1123 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1124
1125 if (rtmsg->rtm_family == AF_INET
1126 && (nexthop->type == NEXTHOP_TYPE_IPV6
1127 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1128 bytelen = 4;
1129 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1130 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1131 bytelen);
1132 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1133 rtnh->rtnh_ifindex = nexthop->ifindex;
1134
1135 if (nexthop->rmap_src.ipv4.s_addr)
1136 *src = &nexthop->rmap_src;
1137 else if (nexthop->src.ipv4.s_addr)
1138 *src = &nexthop->src;
1139
1140 if (IS_ZEBRA_DEBUG_KERNEL)
1141 zlog_debug(
1142 " 5549: netlink_route_build_multipath() (%s): "
1143 "nexthop via %s %s if %u",
1144 routedesc, ipv4_ll_buf, label_buf,
1145 nexthop->ifindex);
1146 return;
1147 }
1148
1149 if (nexthop->type == NEXTHOP_TYPE_IPV4
1150 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1151 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1152 rta, rtnh, NL_PKT_BUF_SIZE,
1153 bytelen, nexthop);
1154 if (nexthop->rmap_src.ipv4.s_addr)
1155 *src = &nexthop->rmap_src;
1156 else if (nexthop->src.ipv4.s_addr)
1157 *src = &nexthop->src;
1158
1159 if (IS_ZEBRA_DEBUG_KERNEL)
1160 zlog_debug(
1161 "netlink_route_multipath() (%s): "
1162 "nexthop via %s %s if %u",
1163 routedesc, inet_ntoa(nexthop->gate.ipv4),
1164 label_buf, nexthop->ifindex);
1165 }
1166 if (nexthop->type == NEXTHOP_TYPE_IPV6
1167 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1168 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1169 rta, rtnh, NL_PKT_BUF_SIZE,
1170 bytelen, nexthop);
1171
1172 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1173 *src = &nexthop->rmap_src;
1174 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1175 *src = &nexthop->src;
1176
1177 if (IS_ZEBRA_DEBUG_KERNEL)
1178 zlog_debug(
1179 "netlink_route_multipath() (%s): "
1180 "nexthop via %s %s if %u",
1181 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1182 label_buf, nexthop->ifindex);
1183 }
1184 /* ifindex */
1185 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1186 || nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1187 rtnh->rtnh_ifindex = nexthop->ifindex;
1188
1189 if (nexthop->rmap_src.ipv4.s_addr)
1190 *src = &nexthop->rmap_src;
1191 else if (nexthop->src.ipv4.s_addr)
1192 *src = &nexthop->src;
1193
1194 if (IS_ZEBRA_DEBUG_KERNEL)
1195 zlog_debug(
1196 "netlink_route_multipath() (%s): "
1197 "nexthop via if %u",
1198 routedesc, nexthop->ifindex);
1199 } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1200 rtnh->rtnh_ifindex = nexthop->ifindex;
1201
1202 if (IS_ZEBRA_DEBUG_KERNEL)
1203 zlog_debug(
1204 "netlink_route_multipath() (%s): "
1205 "nexthop via if %u",
1206 routedesc, nexthop->ifindex);
1207 } else {
1208 rtnh->rtnh_ifindex = 0;
1209 }
1210 }
1211
1212 static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1213 zebra_nhlfe_t *nhlfe,
1214 struct nlmsghdr *nlmsg,
1215 struct rtmsg *rtmsg,
1216 size_t req_size, int cmd)
1217 {
1218 int bytelen;
1219 u_char family;
1220
1221 family = NHLFE_FAMILY(nhlfe);
1222 bytelen = (family == AF_INET ? 4 : 16);
1223 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1224 nlmsg, rtmsg, req_size, cmd);
1225 }
1226
1227
1228 static inline void
1229 _netlink_mpls_build_multipath(const char *routedesc, zebra_nhlfe_t *nhlfe,
1230 struct rtattr *rta, struct rtnexthop *rtnh,
1231 struct rtmsg *rtmsg, union g_addr **src)
1232 {
1233 int bytelen;
1234 u_char family;
1235
1236 family = NHLFE_FAMILY(nhlfe);
1237 bytelen = (family == AF_INET ? 4 : 16);
1238 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1239 rtnh, rtmsg, src);
1240 }
1241
1242
1243 /* Log debug information for netlink_route_multipath
1244 * if debug logging is enabled.
1245 *
1246 * @param cmd: Netlink command which is to be processed
1247 * @param p: Prefix for which the change is due
1248 * @param nexthop: Nexthop which is currently processed
1249 * @param routedesc: Semantic annotation for nexthop
1250 * (recursive, multipath, etc.)
1251 * @param family: Address family which the change concerns
1252 */
1253 static void _netlink_route_debug(int cmd, struct prefix *p,
1254 struct nexthop *nexthop, const char *routedesc,
1255 int family, struct zebra_vrf *zvrf)
1256 {
1257 if (IS_ZEBRA_DEBUG_KERNEL) {
1258 char buf[PREFIX_STRLEN];
1259 zlog_debug(
1260 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1261 routedesc, nl_msg_type_to_str(cmd),
1262 prefix2str(p, buf, sizeof(buf)), zvrf_id(zvrf),
1263 (nexthop) ? nexthop_type_to_str(nexthop->type) : "UNK");
1264 }
1265 }
1266
1267 static void _netlink_mpls_debug(int cmd, u_int32_t label, const char *routedesc)
1268 {
1269 if (IS_ZEBRA_DEBUG_KERNEL)
1270 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1271 nl_msg_type_to_str(cmd), label);
1272 }
1273
1274 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1275 int llalen)
1276 {
1277 struct {
1278 struct nlmsghdr n;
1279 struct ndmsg ndm;
1280 char buf[256];
1281 } req;
1282
1283 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1284
1285 memset(&req.n, 0, sizeof(req.n));
1286 memset(&req.ndm, 0, sizeof(req.ndm));
1287
1288 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1289 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1290 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1291 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1292
1293 req.ndm.ndm_family = AF_INET;
1294 req.ndm.ndm_state = NUD_PERMANENT;
1295 req.ndm.ndm_ifindex = ifindex;
1296 req.ndm.ndm_type = RTN_UNICAST;
1297
1298 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1299 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1300
1301 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1302 0);
1303 }
1304
1305 /* Routing table change via netlink interface. */
1306 /* Update flag indicates whether this is a "replace" or not. */
1307 static int netlink_route_multipath(int cmd, struct prefix *p,
1308 struct prefix *src_p, struct route_entry *re,
1309 int update)
1310 {
1311 int bytelen;
1312 struct sockaddr_nl snl;
1313 struct nexthop *nexthop = NULL;
1314 unsigned int nexthop_num;
1315 int discard = 0;
1316 int family = PREFIX_FAMILY(p);
1317 const char *routedesc;
1318 int setsrc = 0;
1319 union g_addr src;
1320
1321 struct {
1322 struct nlmsghdr n;
1323 struct rtmsg r;
1324 char buf[NL_PKT_BUF_SIZE];
1325 } req;
1326
1327 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1328 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1329
1330 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1331
1332 bytelen = (family == AF_INET ? 4 : 16);
1333
1334 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1335 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1336 if ((cmd == RTM_NEWROUTE) && update)
1337 req.n.nlmsg_flags |= NLM_F_REPLACE;
1338 req.n.nlmsg_type = cmd;
1339 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1340
1341 req.r.rtm_family = family;
1342 req.r.rtm_dst_len = p->prefixlen;
1343 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1344 req.r.rtm_protocol = zebra2proto(re->type);
1345 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1346 req.r.rtm_type = RTN_UNICAST;
1347
1348 addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1349 if (src_p)
1350 addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix,
1351 bytelen);
1352
1353 /* Metric. */
1354 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1355 * used
1356 * either by the kernel or by zebra. Its purely for calculating best
1357 * path(s)
1358 * by the routing protocol and for communicating with protocol peers.
1359 */
1360 addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1361
1362 /* Table corresponding to this route. */
1363 if (re->table < 256)
1364 req.r.rtm_table = re->table;
1365 else {
1366 req.r.rtm_table = RT_TABLE_UNSPEC;
1367 addattr32(&req.n, sizeof req, RTA_TABLE, re->table);
1368 }
1369
1370 if (discard)
1371 goto skip;
1372
1373 if (re->mtu || re->nexthop_mtu) {
1374 char buf[NL_PKT_BUF_SIZE];
1375 struct rtattr *rta = (void *)buf;
1376 u_int32_t mtu = re->mtu;
1377 if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu))
1378 mtu = re->nexthop_mtu;
1379 rta->rta_type = RTA_METRICS;
1380 rta->rta_len = RTA_LENGTH(0);
1381 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1382 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1383 RTA_PAYLOAD(rta));
1384 }
1385
1386 /* Count overall nexthops so we can decide whether to use singlepath
1387 * or multipath case. */
1388 nexthop_num = 0;
1389 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1390 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1391 continue;
1392 if (cmd == RTM_NEWROUTE
1393 && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1394 continue;
1395 if (cmd == RTM_DELROUTE
1396 && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
1397 continue;
1398
1399 nexthop_num++;
1400 }
1401
1402 /* Singlepath case. */
1403 if (nexthop_num == 1 || multipath_num == 1) {
1404 nexthop_num = 0;
1405 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1406 /*
1407 * So we want to cover 2 types of blackhole
1408 * routes here:
1409 * 1) A normal blackhole route( ala from a static
1410 * install.
1411 * 2) A recursively resolved blackhole route
1412 */
1413 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1414 switch (nexthop->bh_type) {
1415 case BLACKHOLE_ADMINPROHIB:
1416 req.r.rtm_type = RTN_PROHIBIT;
1417 break;
1418 case BLACKHOLE_REJECT:
1419 req.r.rtm_type = RTN_UNREACHABLE;
1420 break;
1421 default:
1422 req.r.rtm_type = RTN_BLACKHOLE;
1423 break;
1424 }
1425 goto skip;
1426 }
1427 if (CHECK_FLAG(nexthop->flags,
1428 NEXTHOP_FLAG_RECURSIVE)) {
1429 if (!setsrc) {
1430 if (family == AF_INET) {
1431 if (nexthop->rmap_src.ipv4
1432 .s_addr
1433 != 0) {
1434 src.ipv4 =
1435 nexthop->rmap_src
1436 .ipv4;
1437 setsrc = 1;
1438 } else if (nexthop->src.ipv4
1439 .s_addr
1440 != 0) {
1441 src.ipv4 =
1442 nexthop->src
1443 .ipv4;
1444 setsrc = 1;
1445 }
1446 } else if (family == AF_INET6) {
1447 if (!IN6_IS_ADDR_UNSPECIFIED(
1448 &nexthop->rmap_src
1449 .ipv6)) {
1450 src.ipv6 =
1451 nexthop->rmap_src
1452 .ipv6;
1453 setsrc = 1;
1454 } else if (
1455 !IN6_IS_ADDR_UNSPECIFIED(
1456 &nexthop->src
1457 .ipv6)) {
1458 src.ipv6 =
1459 nexthop->src
1460 .ipv6;
1461 setsrc = 1;
1462 }
1463 }
1464 }
1465 continue;
1466 }
1467
1468 if ((cmd == RTM_NEWROUTE
1469 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1470 || (cmd == RTM_DELROUTE
1471 && CHECK_FLAG(nexthop->flags,
1472 NEXTHOP_FLAG_FIB))) {
1473 routedesc = nexthop->rparent
1474 ? "recursive, single-path"
1475 : "single-path";
1476
1477 _netlink_route_debug(cmd, p, nexthop, routedesc,
1478 family, zvrf);
1479 _netlink_route_build_singlepath(
1480 routedesc, bytelen, nexthop, &req.n,
1481 &req.r, sizeof req, cmd);
1482 nexthop_num++;
1483 break;
1484 }
1485 }
1486 if (setsrc && (cmd == RTM_NEWROUTE)) {
1487 if (family == AF_INET)
1488 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1489 &src.ipv4, bytelen);
1490 else if (family == AF_INET6)
1491 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1492 &src.ipv6, bytelen);
1493 }
1494 } else {
1495 char buf[NL_PKT_BUF_SIZE];
1496 struct rtattr *rta = (void *)buf;
1497 struct rtnexthop *rtnh;
1498 union g_addr *src1 = NULL;
1499
1500 rta->rta_type = RTA_MULTIPATH;
1501 rta->rta_len = RTA_LENGTH(0);
1502 rtnh = RTA_DATA(rta);
1503
1504 nexthop_num = 0;
1505 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1506 if (nexthop_num >= multipath_num)
1507 break;
1508
1509 if (CHECK_FLAG(nexthop->flags,
1510 NEXTHOP_FLAG_RECURSIVE)) {
1511 /* This only works for IPv4 now */
1512 if (!setsrc) {
1513 if (family == AF_INET) {
1514 if (nexthop->rmap_src.ipv4
1515 .s_addr
1516 != 0) {
1517 src.ipv4 =
1518 nexthop->rmap_src
1519 .ipv4;
1520 setsrc = 1;
1521 } else if (nexthop->src.ipv4
1522 .s_addr
1523 != 0) {
1524 src.ipv4 =
1525 nexthop->src
1526 .ipv4;
1527 setsrc = 1;
1528 }
1529 } else if (family == AF_INET6) {
1530 if (!IN6_IS_ADDR_UNSPECIFIED(
1531 &nexthop->rmap_src
1532 .ipv6)) {
1533 src.ipv6 =
1534 nexthop->rmap_src
1535 .ipv6;
1536 setsrc = 1;
1537 } else if (
1538 !IN6_IS_ADDR_UNSPECIFIED(
1539 &nexthop->src
1540 .ipv6)) {
1541 src.ipv6 =
1542 nexthop->src
1543 .ipv6;
1544 setsrc = 1;
1545 }
1546 }
1547 }
1548 continue;
1549 }
1550
1551 if ((cmd == RTM_NEWROUTE
1552 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1553 || (cmd == RTM_DELROUTE
1554 && CHECK_FLAG(nexthop->flags,
1555 NEXTHOP_FLAG_FIB))) {
1556 routedesc = nexthop->rparent
1557 ? "recursive, multipath"
1558 : "multipath";
1559 nexthop_num++;
1560
1561 _netlink_route_debug(cmd, p, nexthop, routedesc,
1562 family, zvrf);
1563 _netlink_route_build_multipath(
1564 routedesc, bytelen, nexthop, rta, rtnh,
1565 &req.r, &src1);
1566 rtnh = RTNH_NEXT(rtnh);
1567
1568 if (!setsrc && src1) {
1569 if (family == AF_INET)
1570 src.ipv4 = src1->ipv4;
1571 else if (family == AF_INET6)
1572 src.ipv6 = src1->ipv6;
1573
1574 setsrc = 1;
1575 }
1576 }
1577 }
1578 if (setsrc && (cmd == RTM_NEWROUTE)) {
1579 if (family == AF_INET)
1580 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1581 &src.ipv4, bytelen);
1582 else if (family == AF_INET6)
1583 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1584 &src.ipv6, bytelen);
1585 if (IS_ZEBRA_DEBUG_KERNEL)
1586 zlog_debug("Setting source");
1587 }
1588
1589 if (rta->rta_len > RTA_LENGTH(0))
1590 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1591 RTA_DATA(rta), RTA_PAYLOAD(rta));
1592 }
1593
1594 /* If there is no useful nexthop then return. */
1595 if (nexthop_num == 0) {
1596 if (IS_ZEBRA_DEBUG_KERNEL)
1597 zlog_debug(
1598 "netlink_route_multipath(): No useful nexthop.");
1599 return 0;
1600 }
1601
1602 skip:
1603
1604 /* Destination netlink address. */
1605 memset(&snl, 0, sizeof snl);
1606 snl.nl_family = AF_NETLINK;
1607
1608 /* Talk to netlink socket. */
1609 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1610 0);
1611 }
1612
1613 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1614 {
1615 int suc = 0;
1616 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1617 struct {
1618 struct nlmsghdr n;
1619 struct ndmsg ndm;
1620 char buf[256];
1621 } req;
1622
1623 mroute = mr;
1624 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1625
1626 memset(&req.n, 0, sizeof(req.n));
1627 memset(&req.ndm, 0, sizeof(req.ndm));
1628
1629 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1630 req.n.nlmsg_flags = NLM_F_REQUEST;
1631 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1632
1633 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1634 req.n.nlmsg_type = RTM_GETROUTE;
1635
1636 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1637 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1638 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1639 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1640 addattr_l(&req.n, sizeof(req), RTA_TABLE, &zvrf->table_id, 4);
1641
1642 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1643 &zns->netlink_cmd, zns, 0);
1644
1645 mroute = NULL;
1646 return suc;
1647 }
1648
1649 int kernel_route_rib(struct prefix *p, struct prefix *src_p,
1650 struct route_entry *old, struct route_entry *new)
1651 {
1652 assert(old || new);
1653
1654 if (!old && new)
1655 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 0);
1656 if (old && !new)
1657 return netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0);
1658
1659 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 1);
1660 }
1661
1662 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1663 int llalen)
1664 {
1665 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1666 addr, lla, llalen);
1667 }
1668
1669 /*
1670 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1671 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1672 */
1673 static int netlink_vxlan_flood_list_update(struct interface *ifp,
1674 struct in_addr *vtep_ip, int cmd)
1675 {
1676 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1677 struct {
1678 struct nlmsghdr n;
1679 struct ndmsg ndm;
1680 char buf[256];
1681 } req;
1682 u_char dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1683
1684 memset(&req.n, 0, sizeof(req.n));
1685 memset(&req.ndm, 0, sizeof(req.ndm));
1686
1687 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1688 req.n.nlmsg_flags = NLM_F_REQUEST;
1689 if (cmd == RTM_NEWNEIGH)
1690 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1691 req.n.nlmsg_type = cmd;
1692 req.ndm.ndm_family = PF_BRIDGE;
1693 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1694 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1695
1696
1697 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1698 req.ndm.ndm_ifindex = ifp->ifindex;
1699 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1700
1701 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1702 0);
1703 }
1704
1705 /*
1706 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1707 * adding
1708 * a "flood" MAC FDB entry.
1709 */
1710 int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1711 {
1712 if (IS_ZEBRA_DEBUG_VXLAN)
1713 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1714 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1715
1716 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
1717 }
1718
1719 /*
1720 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1721 * deleting the "flood" MAC FDB entry.
1722 */
1723 int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1724 {
1725 if (IS_ZEBRA_DEBUG_VXLAN)
1726 zlog_debug(
1727 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1728 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1729
1730 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
1731 }
1732
1733 #ifndef NDA_RTA
1734 #define NDA_RTA(r) \
1735 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1736 #endif
1737
1738 static int netlink_macfdb_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
1739 int len)
1740 {
1741 struct ndmsg *ndm;
1742 struct interface *ifp;
1743 struct zebra_if *zif;
1744 struct rtattr *tb[NDA_MAX + 1];
1745 struct interface *br_if;
1746 struct ethaddr mac;
1747 vlanid_t vid = 0;
1748 struct prefix vtep_ip;
1749 int vid_present = 0, dst_present = 0;
1750 char buf[ETHER_ADDR_STRLEN];
1751 char vid_buf[20];
1752 char dst_buf[30];
1753 u_char sticky = 0;
1754
1755 ndm = NLMSG_DATA(h);
1756
1757 /* We only process macfdb notifications if EVPN is enabled */
1758 if (!is_evpn_enabled())
1759 return 0;
1760
1761 /* The interface should exist. */
1762 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
1763 ndm->ndm_ifindex);
1764 if (!ifp || !ifp->info)
1765 return 0;
1766
1767 /* The interface should be something we're interested in. */
1768 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
1769 return 0;
1770
1771 /* Drop "permanent" entries. */
1772 if (ndm->ndm_state & NUD_PERMANENT)
1773 return 0;
1774
1775 zif = (struct zebra_if *)ifp->info;
1776 if ((br_if = zif->brslave_info.br_if) == NULL) {
1777 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1778 nl_msg_type_to_str(h->nlmsg_type),
1779 nl_family_to_str(ndm->ndm_family), ifp->name,
1780 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1781 return 0;
1782 }
1783
1784 /* Parse attributes and extract fields of interest. */
1785 memset(tb, 0, sizeof tb);
1786 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
1787
1788 if (!tb[NDA_LLADDR]) {
1789 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1790 nl_msg_type_to_str(h->nlmsg_type),
1791 nl_family_to_str(ndm->ndm_family), ifp->name,
1792 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1793 return 0;
1794 }
1795
1796 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
1797 zlog_warn(
1798 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
1799 nl_msg_type_to_str(h->nlmsg_type),
1800 nl_family_to_str(ndm->ndm_family), ifp->name,
1801 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex,
1802 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
1803 return 0;
1804 }
1805
1806 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
1807
1808 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
1809 vid_present = 1;
1810 vid = *(u_int16_t *)RTA_DATA(tb[NDA_VLAN]);
1811 sprintf(vid_buf, " VLAN %u", vid);
1812 }
1813
1814 if (tb[NDA_DST]) {
1815 /* TODO: Only IPv4 supported now. */
1816 dst_present = 1;
1817 vtep_ip.family = AF_INET;
1818 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
1819 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
1820 IPV4_MAX_BYTELEN);
1821 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
1822 }
1823
1824 sticky = (ndm->ndm_state & NUD_NOARP) ? 1 : 0;
1825
1826 if (IS_ZEBRA_DEBUG_KERNEL)
1827 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1828 nl_msg_type_to_str(h->nlmsg_type),
1829 nl_family_to_str(ndm->ndm_family), ifp->name,
1830 ndm->ndm_ifindex, vid_present ? vid_buf : "",
1831 sticky ? "sticky " : "",
1832 prefix_mac2str(&mac, buf, sizeof(buf)),
1833 dst_present ? dst_buf : "");
1834
1835 if (filter_vlan && vid != filter_vlan)
1836 return 0;
1837
1838 /* If add or update, do accordingly if learnt on a "local" interface; if
1839 * the notification is over VxLAN, this has to be related to
1840 * multi-homing,
1841 * so perform an implicit delete of any local entry (if it exists).
1842 */
1843 if (h->nlmsg_type == RTM_NEWNEIGH) {
1844 /* Drop "permanent" entries. */
1845 if (ndm->ndm_state & NUD_PERMANENT)
1846 return 0;
1847
1848 if (IS_ZEBRA_IF_VXLAN(ifp))
1849 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
1850 vid);
1851
1852 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
1853 sticky);
1854 }
1855
1856 /* This is a delete notification.
1857 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1858 * 2. For a MAC over "local" interface, delete the mac
1859 * Note: We will get notifications from both bridge driver and VxLAN
1860 * driver.
1861 * Ignore the notification from VxLan driver as it is also generated
1862 * when mac moves from remote to local.
1863 */
1864 if (dst_present)
1865 return 0;
1866
1867 if (IS_ZEBRA_IF_VXLAN(ifp))
1868 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
1869 vid);
1870
1871 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
1872 }
1873
1874 static int netlink_macfdb_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
1875 ns_id_t ns_id, int startup)
1876 {
1877 int len;
1878 struct ndmsg *ndm;
1879
1880 if (h->nlmsg_type != RTM_NEWNEIGH)
1881 return 0;
1882
1883 /* Length validity. */
1884 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
1885 if (len < 0)
1886 return -1;
1887
1888 /* We are interested only in AF_BRIDGE notifications. */
1889 ndm = NLMSG_DATA(h);
1890 if (ndm->ndm_family != AF_BRIDGE)
1891 return 0;
1892
1893 return netlink_macfdb_change(snl, h, len);
1894 }
1895
1896 /* Request for MAC FDB information from the kernel */
1897 static int netlink_request_macs(struct zebra_ns *zns, int family, int type,
1898 ifindex_t master_ifindex)
1899 {
1900 struct {
1901 struct nlmsghdr n;
1902 struct ifinfomsg ifm;
1903 char buf[256];
1904 } req;
1905
1906 /* Form the request, specifying filter (rtattr) if needed. */
1907 memset(&req, 0, sizeof(req));
1908 req.n.nlmsg_type = type;
1909 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1910 req.ifm.ifi_family = family;
1911 if (master_ifindex)
1912 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
1913
1914 return netlink_request(&zns->netlink_cmd, &req.n);
1915 }
1916
1917 /*
1918 * MAC forwarding database read using netlink interface. This is invoked
1919 * at startup.
1920 */
1921 int netlink_macfdb_read(struct zebra_ns *zns)
1922 {
1923 int ret;
1924
1925 /* Get bridge FDB table. */
1926 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH, 0);
1927 if (ret < 0)
1928 return ret;
1929 /* We are reading entire table. */
1930 filter_vlan = 0;
1931 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1932 0, 1);
1933
1934 return ret;
1935 }
1936
1937 /*
1938 * MAC forwarding database read using netlink interface. This is for a
1939 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1940 */
1941 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
1942 struct interface *br_if)
1943 {
1944 struct zebra_if *br_zif;
1945 struct zebra_if *zif;
1946 struct zebra_l2info_vxlan *vxl;
1947 int ret = 0;
1948
1949
1950 /* Save VLAN we're filtering on, if needed. */
1951 br_zif = (struct zebra_if *)br_if->info;
1952 zif = (struct zebra_if *)ifp->info;
1953 vxl = &zif->l2info.vxl;
1954 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
1955 filter_vlan = vxl->access_vlan;
1956
1957 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
1958 */
1959 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH,
1960 br_if->ifindex);
1961 if (ret < 0)
1962 return ret;
1963 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1964 0, 0);
1965
1966 /* Reset VLAN filter. */
1967 filter_vlan = 0;
1968 return ret;
1969 }
1970
1971 static int netlink_macfdb_update(struct interface *ifp, vlanid_t vid,
1972 struct ethaddr *mac, struct in_addr vtep_ip,
1973 int local, int cmd, u_char sticky)
1974 {
1975 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1976 struct {
1977 struct nlmsghdr n;
1978 struct ndmsg ndm;
1979 char buf[256];
1980 } req;
1981 int dst_alen;
1982 struct zebra_if *zif;
1983 struct interface *br_if;
1984 struct zebra_if *br_zif;
1985 char buf[ETHER_ADDR_STRLEN];
1986 int vid_present = 0, dst_present = 0;
1987 char vid_buf[20];
1988 char dst_buf[30];
1989
1990 zif = ifp->info;
1991 if ((br_if = zif->brslave_info.br_if) == NULL) {
1992 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
1993 (cmd == RTM_NEWNEIGH) ? "add" : "del", ifp->name,
1994 ifp->ifindex);
1995 return -1;
1996 }
1997
1998 memset(&req.n, 0, sizeof(req.n));
1999 memset(&req.ndm, 0, sizeof(req.ndm));
2000
2001 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2002 req.n.nlmsg_flags = NLM_F_REQUEST;
2003 if (cmd == RTM_NEWNEIGH)
2004 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2005 req.n.nlmsg_type = cmd;
2006 req.ndm.ndm_family = AF_BRIDGE;
2007 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2008 req.ndm.ndm_state = NUD_REACHABLE;
2009
2010 if (sticky)
2011 req.ndm.ndm_state |= NUD_NOARP;
2012 else
2013 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2014
2015 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2016 req.ndm.ndm_ifindex = ifp->ifindex;
2017 if (!local) {
2018 dst_alen = 4; // TODO: hardcoded
2019 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2020 dst_present = 1;
2021 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
2022 }
2023 br_zif = (struct zebra_if *)br_if->info;
2024 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2025 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2026 vid_present = 1;
2027 sprintf(vid_buf, " VLAN %u", vid);
2028 }
2029 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2030
2031 if (IS_ZEBRA_DEBUG_KERNEL)
2032 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2033 nl_msg_type_to_str(cmd),
2034 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2035 ifp->ifindex, vid_present ? vid_buf : "",
2036 sticky ? "sticky " : "",
2037 prefix_mac2str(mac, buf, sizeof(buf)),
2038 dst_present ? dst_buf : "");
2039
2040 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2041 0);
2042 }
2043
2044 #define NUD_VALID \
2045 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2046 | NUD_DELAY)
2047
2048 static int netlink_ipneigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2049 int len)
2050 {
2051 struct ndmsg *ndm;
2052 struct interface *ifp;
2053 struct zebra_if *zif;
2054 struct rtattr *tb[NDA_MAX + 1];
2055 struct interface *link_if;
2056 struct ethaddr mac;
2057 struct ipaddr ip;
2058 char buf[ETHER_ADDR_STRLEN];
2059 char buf2[INET6_ADDRSTRLEN];
2060 int mac_present = 0;
2061 u_char ext_learned;
2062
2063 ndm = NLMSG_DATA(h);
2064
2065 /* We only process neigh notifications if EVPN is enabled */
2066 if (!is_evpn_enabled())
2067 return 0;
2068
2069 /* The interface should exist. */
2070 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2071 ndm->ndm_ifindex);
2072 if (!ifp || !ifp->info)
2073 return 0;
2074
2075 /* Drop "permanent" entries. */
2076 if (ndm->ndm_state & NUD_PERMANENT)
2077 return 0;
2078
2079 zif = (struct zebra_if *)ifp->info;
2080 /* The neighbor is present on an SVI. From this, we locate the
2081 * underlying
2082 * bridge because we're only interested in neighbors on a VxLAN bridge.
2083 * The bridge is located based on the nature of the SVI:
2084 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2085 * interface
2086 * and is linked to the bridge
2087 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2088 * inteface
2089 * itself
2090 */
2091 if (IS_ZEBRA_IF_VLAN(ifp)) {
2092 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2093 zif->link_ifindex);
2094 if (!link_if)
2095 return 0;
2096 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2097 link_if = ifp;
2098 else
2099 return 0;
2100
2101 /* Parse attributes and extract fields of interest. */
2102 memset(tb, 0, sizeof tb);
2103 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2104
2105 if (!tb[NDA_DST]) {
2106 zlog_warn("%s family %s IF %s(%u) - no DST",
2107 nl_msg_type_to_str(h->nlmsg_type),
2108 nl_family_to_str(ndm->ndm_family), ifp->name,
2109 ndm->ndm_ifindex);
2110 return 0;
2111 }
2112 memset(&mac, 0, sizeof(struct ethaddr));
2113 memset(&ip, 0, sizeof(struct ipaddr));
2114 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2115 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2116
2117 if (h->nlmsg_type == RTM_NEWNEIGH) {
2118 if (tb[NDA_LLADDR]) {
2119 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2120 zlog_warn(
2121 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2122 nl_msg_type_to_str(h->nlmsg_type),
2123 nl_family_to_str(ndm->ndm_family),
2124 ifp->name, ndm->ndm_ifindex,
2125 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2126 return 0;
2127 }
2128
2129 mac_present = 1;
2130 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2131 }
2132
2133 ext_learned = (ndm->ndm_flags & NTF_EXT_LEARNED) ? 1 : 0;
2134
2135 if (IS_ZEBRA_DEBUG_KERNEL)
2136 zlog_debug(
2137 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2138 nl_msg_type_to_str(h->nlmsg_type),
2139 nl_family_to_str(ndm->ndm_family), ifp->name,
2140 ndm->ndm_ifindex,
2141 ipaddr2str(&ip, buf2, sizeof(buf2)),
2142 mac_present
2143 ? prefix_mac2str(&mac, buf, sizeof(buf))
2144 : "",
2145 ndm->ndm_state, ndm->ndm_flags);
2146
2147 /* If the neighbor state is valid for use, process as an add or
2148 * update
2149 * else process as a delete. Note that the delete handling may
2150 * result
2151 * in re-adding the neighbor if it is a valid "remote" neighbor.
2152 */
2153 if (ndm->ndm_state & NUD_VALID)
2154 return zebra_vxlan_local_neigh_add_update(
2155 ifp, link_if, &ip, &mac, ndm->ndm_state,
2156 ext_learned);
2157
2158 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2159 }
2160
2161 if (IS_ZEBRA_DEBUG_KERNEL)
2162 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2163 nl_msg_type_to_str(h->nlmsg_type),
2164 nl_family_to_str(ndm->ndm_family), ifp->name,
2165 ndm->ndm_ifindex,
2166 ipaddr2str(&ip, buf2, sizeof(buf2)));
2167
2168 /* Process the delete - it may result in re-adding the neighbor if it is
2169 * a valid "remote" neighbor.
2170 */
2171 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2172 }
2173
2174 static int netlink_neigh_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
2175 ns_id_t ns_id, int startup)
2176 {
2177 int len;
2178 struct ndmsg *ndm;
2179
2180 if (h->nlmsg_type != RTM_NEWNEIGH)
2181 return 0;
2182
2183 /* Length validity. */
2184 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2185 if (len < 0)
2186 return -1;
2187
2188 /* We are interested only in AF_INET or AF_INET6 notifications. */
2189 ndm = NLMSG_DATA(h);
2190 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2191 return 0;
2192
2193 return netlink_neigh_change(snl, h, len);
2194 }
2195
2196 /* Request for IP neighbor information from the kernel */
2197 static int netlink_request_neigh(struct zebra_ns *zns, int family, int type,
2198 ifindex_t ifindex)
2199 {
2200 struct {
2201 struct nlmsghdr n;
2202 struct ndmsg ndm;
2203 char buf[256];
2204 } req;
2205
2206 /* Form the request, specifying filter (rtattr) if needed. */
2207 memset(&req, 0, sizeof(req));
2208 req.n.nlmsg_type = type;
2209 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2210 req.ndm.ndm_family = family;
2211 if (ifindex)
2212 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2213
2214 return netlink_request(&zns->netlink_cmd, &req.n);
2215 }
2216
2217 /*
2218 * IP Neighbor table read using netlink interface. This is invoked
2219 * at startup.
2220 */
2221 int netlink_neigh_read(struct zebra_ns *zns)
2222 {
2223 int ret;
2224
2225 /* Get IP neighbor table. */
2226 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH, 0);
2227 if (ret < 0)
2228 return ret;
2229 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2230 1);
2231
2232 return ret;
2233 }
2234
2235 /*
2236 * IP Neighbor table read using netlink interface. This is for a specific
2237 * VLAN device.
2238 */
2239 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2240 {
2241 int ret = 0;
2242
2243 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH,
2244 vlan_if->ifindex);
2245 if (ret < 0)
2246 return ret;
2247 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2248 0);
2249
2250 return ret;
2251 }
2252
2253 int netlink_neigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2254 ns_id_t ns_id)
2255 {
2256 int len;
2257 struct ndmsg *ndm;
2258
2259 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2260 return 0;
2261
2262 /* Length validity. */
2263 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2264 if (len < 0)
2265 return -1;
2266
2267 /* Is this a notification for the MAC FDB or IP neighbor table? */
2268 ndm = NLMSG_DATA(h);
2269 if (ndm->ndm_family == AF_BRIDGE)
2270 return netlink_macfdb_change(snl, h, len);
2271
2272 if (ndm->ndm_type != RTN_UNICAST)
2273 return 0;
2274
2275 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2276 return netlink_ipneigh_change(snl, h, len);
2277
2278 return 0;
2279 }
2280
2281 static int netlink_neigh_update2(struct interface *ifp, struct ipaddr *ip,
2282 struct ethaddr *mac, u_int32_t flags, int cmd)
2283 {
2284 struct {
2285 struct nlmsghdr n;
2286 struct ndmsg ndm;
2287 char buf[256];
2288 } req;
2289 int ipa_len;
2290
2291 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2292 char buf[INET6_ADDRSTRLEN];
2293 char buf2[ETHER_ADDR_STRLEN];
2294
2295 memset(&req.n, 0, sizeof(req.n));
2296 memset(&req.ndm, 0, sizeof(req.ndm));
2297
2298 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2299 req.n.nlmsg_flags = NLM_F_REQUEST;
2300 if (cmd == RTM_NEWNEIGH)
2301 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2302 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2303 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2304 req.ndm.ndm_state = flags;
2305 req.ndm.ndm_ifindex = ifp->ifindex;
2306 req.ndm.ndm_type = RTN_UNICAST;
2307 req.ndm.ndm_flags = NTF_EXT_LEARNED;
2308
2309
2310 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2311 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2312 if (mac)
2313 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2314
2315 if (IS_ZEBRA_DEBUG_KERNEL)
2316 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2317 nl_msg_type_to_str(cmd),
2318 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2319 ifp->ifindex, ipaddr2str(ip, buf, sizeof(buf)),
2320 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2321 : "null");
2322
2323 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2324 0);
2325 }
2326
2327 int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2328 struct in_addr vtep_ip, u_char sticky)
2329 {
2330 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, 0, RTM_NEWNEIGH,
2331 sticky);
2332 }
2333
2334 int kernel_del_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2335 struct in_addr vtep_ip, int local)
2336 {
2337 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, local,
2338 RTM_DELNEIGH, 0);
2339 }
2340
2341 int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
2342 struct ethaddr *mac)
2343 {
2344 return netlink_neigh_update2(ifp, ip, mac, NUD_REACHABLE, RTM_NEWNEIGH);
2345 }
2346
2347 int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip)
2348 {
2349 return netlink_neigh_update2(ifp, ip, NULL, 0, RTM_DELNEIGH);
2350 }
2351
2352 /*
2353 * MPLS label forwarding table change via netlink interface.
2354 */
2355 int netlink_mpls_multipath(int cmd, zebra_lsp_t *lsp)
2356 {
2357 mpls_lse_t lse;
2358 zebra_nhlfe_t *nhlfe;
2359 struct nexthop *nexthop = NULL;
2360 unsigned int nexthop_num;
2361 const char *routedesc;
2362 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2363 int route_type;
2364
2365 struct {
2366 struct nlmsghdr n;
2367 struct rtmsg r;
2368 char buf[NL_PKT_BUF_SIZE];
2369 } req;
2370
2371 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
2372
2373
2374 /*
2375 * Count # nexthops so we can decide whether to use singlepath
2376 * or multipath case.
2377 */
2378 nexthop_num = 0;
2379 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2380 nexthop = nhlfe->nexthop;
2381 if (!nexthop)
2382 continue;
2383 if (cmd == RTM_NEWROUTE) {
2384 /* Count all selected NHLFEs */
2385 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2386 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2387 nexthop_num++;
2388 } else /* DEL */
2389 {
2390 /* Count all installed NHLFEs */
2391 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2392 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2393 nexthop_num++;
2394 }
2395 }
2396
2397 if (nexthop_num == 0 || !lsp->best_nhlfe) // unexpected
2398 return 0;
2399
2400 route_type = re_type_from_lsp_type(lsp->best_nhlfe->type);
2401
2402 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2403 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2404 req.n.nlmsg_type = cmd;
2405 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2406
2407 req.r.rtm_family = AF_MPLS;
2408 req.r.rtm_table = RT_TABLE_MAIN;
2409 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2410 req.r.rtm_protocol = zebra2proto(route_type);
2411 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2412 req.r.rtm_type = RTN_UNICAST;
2413
2414 if (cmd == RTM_NEWROUTE)
2415 /* We do a replace to handle update. */
2416 req.n.nlmsg_flags |= NLM_F_REPLACE;
2417
2418 /* Fill destination */
2419 lse = mpls_lse_encode(lsp->ile.in_label, 0, 0, 1);
2420 addattr_l(&req.n, sizeof req, RTA_DST, &lse, sizeof(mpls_lse_t));
2421
2422 /* Fill nexthops (paths) based on single-path or multipath. The paths
2423 * chosen depend on the operation.
2424 */
2425 if (nexthop_num == 1 || multipath_num == 1) {
2426 routedesc = "single-path";
2427 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2428
2429 nexthop_num = 0;
2430 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2431 nexthop = nhlfe->nexthop;
2432 if (!nexthop)
2433 continue;
2434
2435 if ((cmd == RTM_NEWROUTE
2436 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2437 && CHECK_FLAG(nexthop->flags,
2438 NEXTHOP_FLAG_ACTIVE)))
2439 || (cmd == RTM_DELROUTE
2440 && (CHECK_FLAG(nhlfe->flags,
2441 NHLFE_FLAG_INSTALLED)
2442 && CHECK_FLAG(nexthop->flags,
2443 NEXTHOP_FLAG_FIB)))) {
2444 /* Add the gateway */
2445 _netlink_mpls_build_singlepath(routedesc, nhlfe,
2446 &req.n, &req.r,
2447 sizeof req, cmd);
2448 if (cmd == RTM_NEWROUTE) {
2449 SET_FLAG(nhlfe->flags,
2450 NHLFE_FLAG_INSTALLED);
2451 SET_FLAG(nexthop->flags,
2452 NEXTHOP_FLAG_FIB);
2453 } else {
2454 UNSET_FLAG(nhlfe->flags,
2455 NHLFE_FLAG_INSTALLED);
2456 UNSET_FLAG(nexthop->flags,
2457 NEXTHOP_FLAG_FIB);
2458 }
2459 nexthop_num++;
2460 break;
2461 }
2462 }
2463 } else /* Multipath case */
2464 {
2465 char buf[NL_PKT_BUF_SIZE];
2466 struct rtattr *rta = (void *)buf;
2467 struct rtnexthop *rtnh;
2468 union g_addr *src1 = NULL;
2469
2470 rta->rta_type = RTA_MULTIPATH;
2471 rta->rta_len = RTA_LENGTH(0);
2472 rtnh = RTA_DATA(rta);
2473
2474 routedesc = "multipath";
2475 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2476
2477 nexthop_num = 0;
2478 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2479 nexthop = nhlfe->nexthop;
2480 if (!nexthop)
2481 continue;
2482
2483 if (nexthop_num >= multipath_num)
2484 break;
2485
2486 if ((cmd == RTM_NEWROUTE
2487 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2488 && CHECK_FLAG(nexthop->flags,
2489 NEXTHOP_FLAG_ACTIVE)))
2490 || (cmd == RTM_DELROUTE
2491 && (CHECK_FLAG(nhlfe->flags,
2492 NHLFE_FLAG_INSTALLED)
2493 && CHECK_FLAG(nexthop->flags,
2494 NEXTHOP_FLAG_FIB)))) {
2495 nexthop_num++;
2496
2497 /* Build the multipath */
2498 _netlink_mpls_build_multipath(routedesc, nhlfe,
2499 rta, rtnh, &req.r,
2500 &src1);
2501 rtnh = RTNH_NEXT(rtnh);
2502
2503 if (cmd == RTM_NEWROUTE) {
2504 SET_FLAG(nhlfe->flags,
2505 NHLFE_FLAG_INSTALLED);
2506 SET_FLAG(nexthop->flags,
2507 NEXTHOP_FLAG_FIB);
2508 } else {
2509 UNSET_FLAG(nhlfe->flags,
2510 NHLFE_FLAG_INSTALLED);
2511 UNSET_FLAG(nexthop->flags,
2512 NEXTHOP_FLAG_FIB);
2513 }
2514 }
2515 }
2516
2517 /* Add the multipath */
2518 if (rta->rta_len > RTA_LENGTH(0))
2519 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
2520 RTA_DATA(rta), RTA_PAYLOAD(rta));
2521 }
2522
2523 /* Talk to netlink socket. */
2524 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2525 0);
2526 }
2527
2528 /*
2529 * Handle failure in LSP install, clear flags for NHLFE.
2530 */
2531 void clear_nhlfe_installed(zebra_lsp_t *lsp)
2532 {
2533 zebra_nhlfe_t *nhlfe;
2534 struct nexthop *nexthop;
2535
2536 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2537 nexthop = nhlfe->nexthop;
2538 if (!nexthop)
2539 continue;
2540
2541 UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED);
2542 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
2543 }
2544 }
2545
2546 #endif /* HAVE_NETLINK */