]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
zebra: fix logging of MPLS labels
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26
27 /* Hack for GNU libc version 2. */
28 #ifndef MSG_TRUNC
29 #define MSG_TRUNC 0x20
30 #endif /* MSG_TRUNC */
31
32 #include "linklist.h"
33 #include "if.h"
34 #include "log.h"
35 #include "prefix.h"
36 #include "connected.h"
37 #include "table.h"
38 #include "memory.h"
39 #include "zebra_memory.h"
40 #include "rib.h"
41 #include "thread.h"
42 #include "privs.h"
43 #include "nexthop.h"
44 #include "vrf.h"
45 #include "vty.h"
46 #include "mpls.h"
47 #include "vxlan.h"
48
49 #include "zebra/zserv.h"
50 #include "zebra/zebra_ns.h"
51 #include "zebra/zebra_vrf.h"
52 #include "zebra/rt.h"
53 #include "zebra/redistribute.h"
54 #include "zebra/interface.h"
55 #include "zebra/debug.h"
56 #include "zebra/rtadv.h"
57 #include "zebra/zebra_ptm.h"
58 #include "zebra/zebra_mpls.h"
59 #include "zebra/kernel_netlink.h"
60 #include "zebra/rt_netlink.h"
61 #include "zebra/zebra_mroute.h"
62 #include "zebra/zebra_vxlan.h"
63
64
65 /* TODO - Temporary definitions, need to refine. */
66 #ifndef AF_MPLS
67 #define AF_MPLS 28
68 #endif
69
70 #ifndef RTA_VIA
71 #define RTA_VIA 18
72 #endif
73
74 #ifndef RTA_NEWDST
75 #define RTA_NEWDST 19
76 #endif
77
78 #ifndef RTA_ENCAP_TYPE
79 #define RTA_ENCAP_TYPE 21
80 #endif
81
82 #ifndef RTA_ENCAP
83 #define RTA_ENCAP 22
84 #endif
85
86 #ifndef RTA_EXPIRES
87 #define RTA_EXPIRES 23
88 #endif
89
90 #ifndef LWTUNNEL_ENCAP_MPLS
91 #define LWTUNNEL_ENCAP_MPLS 1
92 #endif
93
94 #ifndef MPLS_IPTUNNEL_DST
95 #define MPLS_IPTUNNEL_DST 1
96 #endif
97
98 #ifndef NDA_MASTER
99 #define NDA_MASTER 9
100 #endif
101
102 #ifndef NTF_MASTER
103 #define NTF_MASTER 0x04
104 #endif
105
106 #ifndef NTF_SELF
107 #define NTF_SELF 0x02
108 #endif
109
110 #ifndef NTF_EXT_LEARNED
111 #define NTF_EXT_LEARNED 0x10
112 #endif
113
114 #ifndef NDA_IFINDEX
115 #define NDA_IFINDEX 8
116 #endif
117
118 #ifndef NDA_VLAN
119 #define NDA_VLAN 5
120 #endif
121 /* End of temporary definitions */
122
123 static vlanid_t filter_vlan = 0;
124
125 struct gw_family_t {
126 u_int16_t filler;
127 u_int16_t family;
128 union g_addr gate;
129 };
130
131 char ipv4_ll_buf[16] = "169.254.0.1";
132 struct in_addr ipv4_ll;
133
134 /*
135 * The ipv4_ll data structure is used for all 5549
136 * additions to the kernel. Let's figure out the
137 * correct value one time instead for every
138 * install/remove of a 5549 type route
139 */
140 void rt_netlink_init(void)
141 {
142 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
143 }
144
145 static inline int is_selfroute(int proto)
146 {
147 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
148 || (proto == RTPROT_STATIC) || (proto == RTPROT_ZEBRA)
149 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
150 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
151 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
152 || (proto == RTPROT_RIP)) {
153 return 1;
154 }
155
156 return 0;
157 }
158
159 static inline int zebra2proto(int proto)
160 {
161 switch (proto) {
162 case ZEBRA_ROUTE_BABEL:
163 proto = RTPROT_BABEL;
164 break;
165 case ZEBRA_ROUTE_BGP:
166 proto = RTPROT_BGP;
167 break;
168 case ZEBRA_ROUTE_OSPF:
169 case ZEBRA_ROUTE_OSPF6:
170 proto = RTPROT_OSPF;
171 break;
172 case ZEBRA_ROUTE_STATIC:
173 proto = RTPROT_STATIC;
174 break;
175 case ZEBRA_ROUTE_ISIS:
176 proto = RTPROT_ISIS;
177 break;
178 case ZEBRA_ROUTE_RIP:
179 proto = RTPROT_RIP;
180 break;
181 case ZEBRA_ROUTE_RIPNG:
182 proto = RTPROT_RIPNG;
183 break;
184 case ZEBRA_ROUTE_NHRP:
185 proto = RTPROT_NHRP;
186 break;
187 case ZEBRA_ROUTE_EIGRP:
188 proto = RTPROT_EIGRP;
189 break;
190 case ZEBRA_ROUTE_LDP:
191 proto = RTPROT_LDP;
192 break;
193 default:
194 proto = RTPROT_ZEBRA;
195 break;
196 }
197
198 return proto;
199 }
200
201 static inline int proto2zebra(int proto, int family)
202 {
203 switch (proto) {
204 case RTPROT_BABEL:
205 proto = ZEBRA_ROUTE_BABEL;
206 break;
207 case RTPROT_BGP:
208 proto = ZEBRA_ROUTE_BGP;
209 break;
210 case RTPROT_OSPF:
211 proto = (family == AFI_IP) ?
212 ZEBRA_ROUTE_OSPF : ZEBRA_ROUTE_OSPF6;
213 break;
214 case RTPROT_ISIS:
215 proto = ZEBRA_ROUTE_ISIS;
216 break;
217 case RTPROT_RIP:
218 proto = ZEBRA_ROUTE_RIP;
219 break;
220 case RTPROT_RIPNG:
221 proto = ZEBRA_ROUTE_RIPNG;
222 break;
223 case RTPROT_NHRP:
224 proto = ZEBRA_ROUTE_NHRP;
225 break;
226 case RTPROT_EIGRP:
227 proto = ZEBRA_ROUTE_EIGRP;
228 break;
229 case RTPROT_LDP:
230 proto = ZEBRA_ROUTE_LDP;
231 break;
232 case RTPROT_STATIC:
233 proto = ZEBRA_ROUTE_STATIC;
234 break;
235 default:
236 proto = ZEBRA_ROUTE_KERNEL;
237 break;
238 }
239 return proto;
240 }
241
242 /*
243 Pending: create an efficient table_id (in a tree/hash) based lookup)
244 */
245 static vrf_id_t vrf_lookup_by_table(u_int32_t table_id)
246 {
247 struct vrf *vrf;
248 struct zebra_vrf *zvrf;
249
250 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
251 if ((zvrf = vrf->info) == NULL || (zvrf->table_id != table_id))
252 continue;
253
254 return zvrf_id(zvrf);
255 }
256
257 return VRF_DEFAULT;
258 }
259
260 /* Looking up routing table by netlink interface. */
261 static int netlink_route_change_read_unicast(struct sockaddr_nl *snl,
262 struct nlmsghdr *h, ns_id_t ns_id,
263 int startup)
264 {
265 int len;
266 struct rtmsg *rtm;
267 struct rtattr *tb[RTA_MAX + 1];
268 u_char flags = 0;
269 struct prefix p;
270 struct prefix_ipv6 src_p = {};
271 vrf_id_t vrf_id = VRF_DEFAULT;
272
273 char anyaddr[16] = {0};
274
275 int proto = ZEBRA_ROUTE_KERNEL;
276 int index = 0;
277 int table;
278 int metric = 0;
279 u_int32_t mtu = 0;
280 uint8_t distance = 0;
281
282 void *dest = NULL;
283 void *gate = NULL;
284 void *prefsrc = NULL; /* IPv4 preferred source host address */
285 void *src = NULL; /* IPv6 srcdest source prefix */
286 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
287
288 rtm = NLMSG_DATA(h);
289
290 if (startup && h->nlmsg_type != RTM_NEWROUTE)
291 return 0;
292 switch (rtm->rtm_type) {
293 case RTN_UNICAST:
294 break;
295 case RTN_BLACKHOLE:
296 bh_type = BLACKHOLE_NULL;
297 break;
298 case RTN_UNREACHABLE:
299 bh_type = BLACKHOLE_REJECT;
300 break;
301 case RTN_PROHIBIT:
302 bh_type = BLACKHOLE_ADMINPROHIB;
303 break;
304 default:
305 return 0;
306 }
307
308 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
309 if (len < 0)
310 return -1;
311
312 memset(tb, 0, sizeof tb);
313 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
314
315 if (rtm->rtm_flags & RTM_F_CLONED)
316 return 0;
317 if (rtm->rtm_protocol == RTPROT_REDIRECT)
318 return 0;
319 if (rtm->rtm_protocol == RTPROT_KERNEL)
320 return 0;
321
322 if (!startup && is_selfroute(rtm->rtm_protocol)
323 && h->nlmsg_type == RTM_NEWROUTE)
324 return 0;
325
326 /* We don't care about change notifications for the MPLS table. */
327 /* TODO: Revisit this. */
328 if (rtm->rtm_family == AF_MPLS)
329 return 0;
330
331 /* Table corresponding to route. */
332 if (tb[RTA_TABLE])
333 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
334 else
335 table = rtm->rtm_table;
336
337 /* Map to VRF */
338 vrf_id = vrf_lookup_by_table(table);
339 if (vrf_id == VRF_DEFAULT) {
340 if (!is_zebra_valid_kernel_table(table)
341 && !is_zebra_main_routing_table(table))
342 return 0;
343 }
344
345 /* Route which inserted by Zebra. */
346 if (is_selfroute(rtm->rtm_protocol)) {
347 flags |= ZEBRA_FLAG_SELFROUTE;
348 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
349 }
350 if (tb[RTA_OIF])
351 index = *(int *)RTA_DATA(tb[RTA_OIF]);
352
353 if (tb[RTA_DST])
354 dest = RTA_DATA(tb[RTA_DST]);
355 else
356 dest = anyaddr;
357
358 if (tb[RTA_SRC])
359 src = RTA_DATA(tb[RTA_SRC]);
360 else
361 src = anyaddr;
362
363 if (tb[RTA_PREFSRC])
364 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
365
366 if (tb[RTA_GATEWAY])
367 gate = RTA_DATA(tb[RTA_GATEWAY]);
368
369 if (tb[RTA_PRIORITY])
370 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
371
372 if (tb[RTA_METRICS]) {
373 struct rtattr *mxrta[RTAX_MAX + 1];
374
375 memset(mxrta, 0, sizeof mxrta);
376 netlink_parse_rtattr(mxrta, RTAX_MAX,
377 RTA_DATA(tb[RTA_METRICS]),
378 RTA_PAYLOAD(tb[RTA_METRICS]));
379
380 if (mxrta[RTAX_MTU])
381 mtu = *(u_int32_t *)RTA_DATA(mxrta[RTAX_MTU]);
382 }
383
384 if (rtm->rtm_family == AF_INET) {
385 p.family = AF_INET;
386 memcpy(&p.u.prefix4, dest, 4);
387 p.prefixlen = rtm->rtm_dst_len;
388
389 src_p.prefixlen =
390 0; // Forces debug below to not display anything
391 } else if (rtm->rtm_family == AF_INET6) {
392 p.family = AF_INET6;
393 memcpy(&p.u.prefix6, dest, 16);
394 p.prefixlen = rtm->rtm_dst_len;
395
396 src_p.family = AF_INET6;
397 memcpy(&src_p.prefix, src, 16);
398 src_p.prefixlen = rtm->rtm_src_len;
399 }
400
401 if (rtm->rtm_src_len != 0) {
402 char buf[PREFIX_STRLEN];
403 zlog_warn(
404 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
405 prefix2str(&p, buf, sizeof(buf)), vrf_id);
406 return 0;
407 }
408
409 /*
410 * For ZEBRA_ROUTE_KERNEL types:
411 *
412 * The metric/priority of the route received from the kernel
413 * is a 32 bit number. We are going to interpret the high
414 * order byte as the Admin Distance and the low order 3 bytes
415 * as the metric.
416 *
417 * This will allow us to do two things:
418 * 1) Allow the creation of kernel routes that can be
419 * overridden by zebra.
420 * 2) Allow the old behavior for 'most' kernel route types
421 * if a user enters 'ip route ...' v4 routes get a metric
422 * of 0 and v6 routes get a metric of 1024. Both of these
423 * values will end up with a admin distance of 0, which
424 * will cause them to win for the purposes of zebra.
425 */
426 if (proto == ZEBRA_ROUTE_KERNEL) {
427 distance = (metric >> 24) & 0xFF;
428 metric = (metric & 0x00FFFFFF);
429 }
430
431 if (IS_ZEBRA_DEBUG_KERNEL) {
432 char buf[PREFIX_STRLEN];
433 char buf2[PREFIX_STRLEN];
434 zlog_debug(
435 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h->nlmsg_type),
436 prefix2str(&p, buf, sizeof(buf)),
437 src_p.prefixlen ? " from " : "",
438 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
439 : "",
440 vrf_id, metric, distance);
441 }
442
443 afi_t afi = AFI_IP;
444 if (rtm->rtm_family == AF_INET6)
445 afi = AFI_IP6;
446
447 if (h->nlmsg_type == RTM_NEWROUTE) {
448 if (!tb[RTA_MULTIPATH]) {
449 struct nexthop nh;
450 size_t sz = (afi == AFI_IP) ? 4 : 16;
451
452 memset(&nh, 0, sizeof(nh));
453
454 if (bh_type == BLACKHOLE_UNSPEC) {
455 if (index && !gate)
456 nh.type = NEXTHOP_TYPE_IFINDEX;
457 else if (index && gate)
458 nh.type = (afi == AFI_IP)
459 ? NEXTHOP_TYPE_IPV4_IFINDEX
460 : NEXTHOP_TYPE_IPV6_IFINDEX;
461 else if (!index && gate)
462 nh.type = (afi == AFI_IP)
463 ? NEXTHOP_TYPE_IPV4
464 : NEXTHOP_TYPE_IPV6;
465 else {
466 nh.type = NEXTHOP_TYPE_BLACKHOLE;
467 nh.bh_type = bh_type;
468 }
469 } else {
470 nh.type = NEXTHOP_TYPE_BLACKHOLE;
471 nh.bh_type = bh_type;
472 }
473 nh.ifindex = index;
474 if (prefsrc)
475 memcpy(&nh.src, prefsrc, sz);
476 if (gate)
477 memcpy(&nh.gate, gate, sz);
478
479 rib_add(afi, SAFI_UNICAST, vrf_id, proto,
480 0, flags, &p, NULL, &nh, table, metric, mtu, distance);
481 } else {
482 /* This is a multipath route */
483
484 struct route_entry *re;
485 struct rtnexthop *rtnh =
486 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
487
488 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
489
490 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
491 re->type = proto;
492 re->distance = distance;
493 re->flags = flags;
494 re->metric = metric;
495 re->mtu = mtu;
496 re->vrf_id = vrf_id;
497 re->table = table;
498 re->nexthop_num = 0;
499 re->uptime = time(NULL);
500
501 for (;;) {
502 if (len < (int)sizeof(*rtnh)
503 || rtnh->rtnh_len > len)
504 break;
505
506 index = rtnh->rtnh_ifindex;
507 gate = 0;
508 if (rtnh->rtnh_len > sizeof(*rtnh)) {
509 memset(tb, 0, sizeof(tb));
510 netlink_parse_rtattr(
511 tb, RTA_MAX, RTNH_DATA(rtnh),
512 rtnh->rtnh_len - sizeof(*rtnh));
513 if (tb[RTA_GATEWAY])
514 gate = RTA_DATA(
515 tb[RTA_GATEWAY]);
516 }
517
518 if (gate) {
519 if (rtm->rtm_family == AF_INET) {
520 if (index)
521 route_entry_nexthop_ipv4_ifindex_add(
522 re, gate,
523 prefsrc, index);
524 else
525 route_entry_nexthop_ipv4_add(
526 re, gate,
527 prefsrc);
528 } else if (rtm->rtm_family
529 == AF_INET6) {
530 if (index)
531 route_entry_nexthop_ipv6_ifindex_add(
532 re, gate,
533 index);
534 else
535 route_entry_nexthop_ipv6_add(
536 re, gate);
537 }
538 } else
539 route_entry_nexthop_ifindex_add(re,
540 index);
541
542 len -= NLMSG_ALIGN(rtnh->rtnh_len);
543 rtnh = RTNH_NEXT(rtnh);
544 }
545
546 zserv_nexthop_num_warn(__func__,
547 (const struct prefix *)&p,
548 re->nexthop_num);
549 if (re->nexthop_num == 0)
550 XFREE(MTYPE_RE, re);
551 else
552 rib_add_multipath(afi, SAFI_UNICAST, &p,
553 NULL, re);
554 }
555 } else {
556 if (!tb[RTA_MULTIPATH]) {
557 struct nexthop nh;
558 size_t sz = (afi == AFI_IP) ? 4 : 16;
559
560 memset(&nh, 0, sizeof(nh));
561 if (bh_type == BLACKHOLE_UNSPEC) {
562 if (index && !gate)
563 nh.type = NEXTHOP_TYPE_IFINDEX;
564 else if (index && gate)
565 nh.type =
566 (afi == AFI_IP)
567 ? NEXTHOP_TYPE_IPV4_IFINDEX
568 : NEXTHOP_TYPE_IPV6_IFINDEX;
569 else if (!index && gate)
570 nh.type = (afi == AFI_IP)
571 ? NEXTHOP_TYPE_IPV4
572 : NEXTHOP_TYPE_IPV6;
573 else {
574 nh.type = NEXTHOP_TYPE_BLACKHOLE;
575 nh.bh_type = BLACKHOLE_UNSPEC;
576 }
577 } else {
578 nh.type = NEXTHOP_TYPE_BLACKHOLE;
579 nh.bh_type = bh_type;
580 }
581 nh.ifindex = index;
582 if (gate)
583 memcpy(&nh.gate, gate, sz);
584 rib_delete(afi, SAFI_UNICAST, vrf_id,
585 proto, 0, flags, &p, NULL, &nh,
586 table, metric, true);
587 } else {
588 /* XXX: need to compare the entire list of nexthops
589 * here for NLM_F_APPEND stupidity */
590 rib_delete(afi, SAFI_UNICAST, vrf_id,
591 proto, 0, flags, &p, NULL, NULL,
592 table, metric, true);
593 }
594 }
595
596 return 0;
597 }
598
599 static struct mcast_route_data *mroute = NULL;
600
601 static int netlink_route_change_read_multicast(struct sockaddr_nl *snl,
602 struct nlmsghdr *h,
603 ns_id_t ns_id, int startup)
604 {
605 int len;
606 struct rtmsg *rtm;
607 struct rtattr *tb[RTA_MAX + 1];
608 struct mcast_route_data *m;
609 struct mcast_route_data mr;
610 int iif = 0;
611 int count;
612 int oif[256];
613 int oif_count = 0;
614 char sbuf[40];
615 char gbuf[40];
616 char oif_list[256] = "\0";
617 vrf_id_t vrf = ns_id;
618 int table;
619
620 if (mroute)
621 m = mroute;
622 else {
623 memset(&mr, 0, sizeof(mr));
624 m = &mr;
625 }
626
627 rtm = NLMSG_DATA(h);
628
629 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
630
631 memset(tb, 0, sizeof tb);
632 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
633
634 if (tb[RTA_TABLE])
635 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
636 else
637 table = rtm->rtm_table;
638
639 vrf = vrf_lookup_by_table(table);
640
641 if (tb[RTA_IIF])
642 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
643
644 if (tb[RTA_SRC])
645 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
646
647 if (tb[RTA_DST])
648 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
649
650 if ((RTA_EXPIRES <= RTA_MAX) && tb[RTA_EXPIRES])
651 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
652
653 if (tb[RTA_MULTIPATH]) {
654 struct rtnexthop *rtnh =
655 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
656
657 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
658 for (;;) {
659 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
660 break;
661
662 oif[oif_count] = rtnh->rtnh_ifindex;
663 oif_count++;
664
665 len -= NLMSG_ALIGN(rtnh->rtnh_len);
666 rtnh = RTNH_NEXT(rtnh);
667 }
668 }
669
670 if (IS_ZEBRA_DEBUG_KERNEL) {
671 struct interface *ifp;
672 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
673 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
674 for (count = 0; count < oif_count; count++) {
675 ifp = if_lookup_by_index(oif[count], vrf);
676 char temp[256];
677
678 sprintf(temp, "%s ", ifp->name);
679 strcat(oif_list, temp);
680 }
681 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf);
682 ifp = if_lookup_by_index(iif, vrf);
683 zlog_debug(
684 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
685 zvrf->vrf->name, vrf, nl_msg_type_to_str(h->nlmsg_type),
686 sbuf, gbuf, ifp->name, oif_list, m->lastused);
687 }
688 return 0;
689 }
690
691 int netlink_route_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
692 ns_id_t ns_id, int startup)
693 {
694 int len;
695 vrf_id_t vrf_id = ns_id;
696 struct rtmsg *rtm;
697
698 rtm = NLMSG_DATA(h);
699
700 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
701 /* If this is not route add/delete message print warning. */
702 zlog_warn("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
703 return 0;
704 }
705
706 /* Connected route. */
707 if (IS_ZEBRA_DEBUG_KERNEL)
708 zlog_debug("%s %s %s proto %s vrf %u",
709 nl_msg_type_to_str(h->nlmsg_type),
710 nl_family_to_str(rtm->rtm_family),
711 nl_rttype_to_str(rtm->rtm_type),
712 nl_rtproto_to_str(rtm->rtm_protocol), vrf_id);
713
714 /* We don't care about change notifications for the MPLS table. */
715 /* TODO: Revisit this. */
716 if (rtm->rtm_family == AF_MPLS)
717 return 0;
718
719 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
720 if (len < 0)
721 return -1;
722
723 if (rtm->rtm_type == RTN_MULTICAST)
724 netlink_route_change_read_multicast(snl, h, ns_id, startup);
725 else
726 netlink_route_change_read_unicast(snl, h, ns_id, startup);
727 return 0;
728 }
729
730 /* Request for specific route information from the kernel */
731 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
732 {
733 struct {
734 struct nlmsghdr n;
735 struct rtmsg rtm;
736 } req;
737
738 /* Form the request, specifying filter (rtattr) if needed. */
739 memset(&req, 0, sizeof(req));
740 req.n.nlmsg_type = type;
741 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
742 req.rtm.rtm_family = family;
743
744 return netlink_request(&zns->netlink_cmd, &req.n);
745 }
746
747 /* Routing table read function using netlink interface. Only called
748 bootstrap time. */
749 int netlink_route_read(struct zebra_ns *zns)
750 {
751 int ret;
752
753 /* Get IPv4 routing table. */
754 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
755 if (ret < 0)
756 return ret;
757 ret = netlink_parse_info(netlink_route_change_read_unicast,
758 &zns->netlink_cmd, zns, 0, 1);
759 if (ret < 0)
760 return ret;
761
762 /* Get IPv6 routing table. */
763 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
764 if (ret < 0)
765 return ret;
766 ret = netlink_parse_info(netlink_route_change_read_unicast,
767 &zns->netlink_cmd, zns, 0, 1);
768 if (ret < 0)
769 return ret;
770
771 return 0;
772 }
773
774 static void _netlink_route_nl_add_gateway_info(u_char route_family,
775 u_char gw_family,
776 struct nlmsghdr *nlmsg,
777 size_t req_size, int bytelen,
778 struct nexthop *nexthop)
779 {
780 if (route_family == AF_MPLS) {
781 struct gw_family_t gw_fam;
782
783 gw_fam.family = gw_family;
784 if (gw_family == AF_INET)
785 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
786 else
787 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
788 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
789 bytelen + 2);
790 } else {
791 if (gw_family == AF_INET)
792 addattr_l(nlmsg, req_size, RTA_GATEWAY,
793 &nexthop->gate.ipv4, bytelen);
794 else
795 addattr_l(nlmsg, req_size, RTA_GATEWAY,
796 &nexthop->gate.ipv6, bytelen);
797 }
798 }
799
800 static void _netlink_route_rta_add_gateway_info(u_char route_family,
801 u_char gw_family,
802 struct rtattr *rta,
803 struct rtnexthop *rtnh,
804 size_t req_size, int bytelen,
805 struct nexthop *nexthop)
806 {
807 if (route_family == AF_MPLS) {
808 struct gw_family_t gw_fam;
809
810 gw_fam.family = gw_family;
811 if (gw_family == AF_INET)
812 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
813 else
814 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
815 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
816 bytelen + 2);
817 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
818 } else {
819 if (gw_family == AF_INET)
820 rta_addattr_l(rta, req_size, RTA_GATEWAY,
821 &nexthop->gate.ipv4, bytelen);
822 else
823 rta_addattr_l(rta, req_size, RTA_GATEWAY,
824 &nexthop->gate.ipv6, bytelen);
825 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
826 }
827 }
828
829 /* This function takes a nexthop as argument and adds
830 * the appropriate netlink attributes to an existing
831 * netlink message.
832 *
833 * @param routedesc: Human readable description of route type
834 * (direct/recursive, single-/multipath)
835 * @param bytelen: Length of addresses in bytes.
836 * @param nexthop: Nexthop information
837 * @param nlmsg: nlmsghdr structure to fill in.
838 * @param req_size: The size allocated for the message.
839 */
840 static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
841 struct nexthop *nexthop,
842 struct nlmsghdr *nlmsg,
843 struct rtmsg *rtmsg,
844 size_t req_size, int cmd)
845 {
846 struct nexthop_label *nh_label;
847 mpls_lse_t out_lse[MPLS_MAX_LABELS];
848 char label_buf[256];
849
850 /*
851 * label_buf is *only* currently used within debugging.
852 * As such when we assign it we are guarding it inside
853 * a debug test. If you want to change this make sure
854 * you fix this assumption
855 */
856 label_buf[0] = '\0';
857 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
858 * (in the case of LER)
859 */
860 nh_label = nexthop->nh_label;
861 if (rtmsg->rtm_family == AF_MPLS) {
862 assert(nh_label);
863 assert(nh_label->num_labels == 1);
864 }
865
866 if (nh_label && nh_label->num_labels) {
867 int i, num_labels = 0;
868 u_int32_t bos;
869 char label_buf1[20];
870
871 for (i = 0; i < nh_label->num_labels; i++) {
872 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
873 bos = ((i == (nh_label->num_labels - 1)) ? 1
874 : 0);
875 out_lse[i] = mpls_lse_encode(nh_label->label[i],
876 0, 0, bos);
877 if (IS_ZEBRA_DEBUG_KERNEL) {
878 if (!num_labels)
879 sprintf(label_buf, "label %u",
880 nh_label->label[i]);
881 else {
882 sprintf(label_buf1, "/%u",
883 nh_label->label[i]);
884 strlcat(label_buf, label_buf1,
885 sizeof(label_buf));
886 }
887 }
888 num_labels++;
889 }
890 }
891 if (num_labels) {
892 if (rtmsg->rtm_family == AF_MPLS)
893 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
894 num_labels * sizeof(mpls_lse_t));
895 else {
896 struct rtattr *nest;
897 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
898
899 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE,
900 &encap, sizeof(u_int16_t));
901 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
902 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST,
903 &out_lse,
904 num_labels * sizeof(mpls_lse_t));
905 addattr_nest_end(nlmsg, nest);
906 }
907 }
908 }
909
910 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
911 rtmsg->rtm_flags |= RTNH_F_ONLINK;
912
913 if (rtmsg->rtm_family == AF_INET
914 && (nexthop->type == NEXTHOP_TYPE_IPV6
915 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
916 rtmsg->rtm_flags |= RTNH_F_ONLINK;
917 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
918 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
919
920 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
921 addattr_l(nlmsg, req_size, RTA_PREFSRC,
922 &nexthop->rmap_src.ipv4, bytelen);
923 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
924 addattr_l(nlmsg, req_size, RTA_PREFSRC,
925 &nexthop->src.ipv4, bytelen);
926
927 if (IS_ZEBRA_DEBUG_KERNEL)
928 zlog_debug(
929 " 5549: _netlink_route_build_singlepath() (%s): "
930 "nexthop via %s %s if %u",
931 routedesc, ipv4_ll_buf, label_buf,
932 nexthop->ifindex);
933 return;
934 }
935
936 if (nexthop->type == NEXTHOP_TYPE_IPV4
937 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
938 /* Send deletes to the kernel without specifying the next-hop */
939 if (cmd != RTM_DELROUTE)
940 _netlink_route_nl_add_gateway_info(
941 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
942 bytelen, nexthop);
943
944 if (cmd == RTM_NEWROUTE) {
945 if (nexthop->rmap_src.ipv4.s_addr)
946 addattr_l(nlmsg, req_size, RTA_PREFSRC,
947 &nexthop->rmap_src.ipv4, bytelen);
948 else if (nexthop->src.ipv4.s_addr)
949 addattr_l(nlmsg, req_size, RTA_PREFSRC,
950 &nexthop->src.ipv4, bytelen);
951 }
952
953 if (IS_ZEBRA_DEBUG_KERNEL)
954 zlog_debug(
955 "netlink_route_multipath() (%s): "
956 "nexthop via %s %s if %u",
957 routedesc, inet_ntoa(nexthop->gate.ipv4),
958 label_buf, nexthop->ifindex);
959 }
960
961 if (nexthop->type == NEXTHOP_TYPE_IPV6
962 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
963 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
964 nlmsg, req_size, bytelen,
965 nexthop);
966
967 if (cmd == RTM_NEWROUTE) {
968 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
969 addattr_l(nlmsg, req_size, RTA_PREFSRC,
970 &nexthop->rmap_src.ipv6, bytelen);
971 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
972 addattr_l(nlmsg, req_size, RTA_PREFSRC,
973 &nexthop->src.ipv6, bytelen);
974 }
975
976 if (IS_ZEBRA_DEBUG_KERNEL)
977 zlog_debug(
978 "netlink_route_multipath() (%s): "
979 "nexthop via %s %s if %u",
980 routedesc, inet6_ntoa(nexthop->gate.ipv6),
981 label_buf, nexthop->ifindex);
982 }
983 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
984 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
985 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
986
987 if (cmd == RTM_NEWROUTE) {
988 if (nexthop->rmap_src.ipv4.s_addr)
989 addattr_l(nlmsg, req_size, RTA_PREFSRC,
990 &nexthop->rmap_src.ipv4, bytelen);
991 else if (nexthop->src.ipv4.s_addr)
992 addattr_l(nlmsg, req_size, RTA_PREFSRC,
993 &nexthop->src.ipv4, bytelen);
994 }
995
996 if (IS_ZEBRA_DEBUG_KERNEL)
997 zlog_debug(
998 "netlink_route_multipath() (%s): "
999 "nexthop via if %u",
1000 routedesc, nexthop->ifindex);
1001 }
1002
1003 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1004 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1005
1006 if (cmd == RTM_NEWROUTE) {
1007 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1008 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1009 &nexthop->rmap_src.ipv6, bytelen);
1010 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1011 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1012 &nexthop->src.ipv6, bytelen);
1013 }
1014
1015 if (IS_ZEBRA_DEBUG_KERNEL)
1016 zlog_debug(
1017 "netlink_route_multipath() (%s): "
1018 "nexthop via if %u",
1019 routedesc, nexthop->ifindex);
1020 }
1021 }
1022
1023 /* This function takes a nexthop as argument and
1024 * appends to the given rtattr/rtnexthop pair the
1025 * representation of the nexthop. If the nexthop
1026 * defines a preferred source, the src parameter
1027 * will be modified to point to that src, otherwise
1028 * it will be kept unmodified.
1029 *
1030 * @param routedesc: Human readable description of route type
1031 * (direct/recursive, single-/multipath)
1032 * @param bytelen: Length of addresses in bytes.
1033 * @param nexthop: Nexthop information
1034 * @param rta: rtnetlink attribute structure
1035 * @param rtnh: pointer to an rtnetlink nexthop structure
1036 * @param src: pointer pointing to a location where
1037 * the prefsrc should be stored.
1038 */
1039 static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1040 struct nexthop *nexthop,
1041 struct rtattr *rta,
1042 struct rtnexthop *rtnh,
1043 struct rtmsg *rtmsg,
1044 union g_addr **src)
1045 {
1046 struct nexthop_label *nh_label;
1047 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1048 char label_buf[256];
1049
1050 rtnh->rtnh_len = sizeof(*rtnh);
1051 rtnh->rtnh_flags = 0;
1052 rtnh->rtnh_hops = 0;
1053 rta->rta_len += rtnh->rtnh_len;
1054
1055 /*
1056 * label_buf is *only* currently used within debugging.
1057 * As such when we assign it we are guarding it inside
1058 * a debug test. If you want to change this make sure
1059 * you fix this assumption
1060 */
1061 label_buf[0] = '\0';
1062 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1063 * (in the case of LER)
1064 */
1065 nh_label = nexthop->nh_label;
1066 if (rtmsg->rtm_family == AF_MPLS) {
1067 assert(nh_label);
1068 assert(nh_label->num_labels == 1);
1069 }
1070
1071 if (nh_label && nh_label->num_labels) {
1072 int i, num_labels = 0;
1073 u_int32_t bos;
1074 char label_buf1[20];
1075
1076 for (i = 0; i < nh_label->num_labels; i++) {
1077 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
1078 bos = ((i == (nh_label->num_labels - 1)) ? 1
1079 : 0);
1080 out_lse[i] = mpls_lse_encode(nh_label->label[i],
1081 0, 0, bos);
1082 if (IS_ZEBRA_DEBUG_KERNEL) {
1083 if (!num_labels)
1084 sprintf(label_buf, "label %u",
1085 nh_label->label[i]);
1086 else {
1087 sprintf(label_buf1, "/%u",
1088 nh_label->label[i]);
1089 strlcat(label_buf, label_buf1,
1090 sizeof(label_buf));
1091 }
1092 }
1093 num_labels++;
1094 }
1095 }
1096 if (num_labels) {
1097 if (rtmsg->rtm_family == AF_MPLS) {
1098 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1099 &out_lse,
1100 num_labels * sizeof(mpls_lse_t));
1101 rtnh->rtnh_len += RTA_LENGTH(
1102 num_labels * sizeof(mpls_lse_t));
1103 } else {
1104 struct rtattr *nest;
1105 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
1106 int len = rta->rta_len;
1107
1108 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1109 RTA_ENCAP_TYPE, &encap,
1110 sizeof(u_int16_t));
1111 nest = rta_nest(rta, NL_PKT_BUF_SIZE,
1112 RTA_ENCAP);
1113 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1114 MPLS_IPTUNNEL_DST, &out_lse,
1115 num_labels * sizeof(mpls_lse_t));
1116 rta_nest_end(rta, nest);
1117 rtnh->rtnh_len += rta->rta_len - len;
1118 }
1119 }
1120 }
1121
1122 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1123 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1124
1125 if (rtmsg->rtm_family == AF_INET
1126 && (nexthop->type == NEXTHOP_TYPE_IPV6
1127 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1128 bytelen = 4;
1129 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1130 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1131 bytelen);
1132 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1133 rtnh->rtnh_ifindex = nexthop->ifindex;
1134
1135 if (nexthop->rmap_src.ipv4.s_addr)
1136 *src = &nexthop->rmap_src;
1137 else if (nexthop->src.ipv4.s_addr)
1138 *src = &nexthop->src;
1139
1140 if (IS_ZEBRA_DEBUG_KERNEL)
1141 zlog_debug(
1142 " 5549: netlink_route_build_multipath() (%s): "
1143 "nexthop via %s %s if %u",
1144 routedesc, ipv4_ll_buf, label_buf,
1145 nexthop->ifindex);
1146 return;
1147 }
1148
1149 if (nexthop->type == NEXTHOP_TYPE_IPV4
1150 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1151 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1152 rta, rtnh, NL_PKT_BUF_SIZE,
1153 bytelen, nexthop);
1154 if (nexthop->rmap_src.ipv4.s_addr)
1155 *src = &nexthop->rmap_src;
1156 else if (nexthop->src.ipv4.s_addr)
1157 *src = &nexthop->src;
1158
1159 if (IS_ZEBRA_DEBUG_KERNEL)
1160 zlog_debug(
1161 "netlink_route_multipath() (%s): "
1162 "nexthop via %s %s if %u",
1163 routedesc, inet_ntoa(nexthop->gate.ipv4),
1164 label_buf, nexthop->ifindex);
1165 }
1166 if (nexthop->type == NEXTHOP_TYPE_IPV6
1167 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1168 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1169 rta, rtnh, NL_PKT_BUF_SIZE,
1170 bytelen, nexthop);
1171
1172 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1173 *src = &nexthop->rmap_src;
1174 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1175 *src = &nexthop->src;
1176
1177 if (IS_ZEBRA_DEBUG_KERNEL)
1178 zlog_debug(
1179 "netlink_route_multipath() (%s): "
1180 "nexthop via %s %s if %u",
1181 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1182 label_buf, nexthop->ifindex);
1183 }
1184 /* ifindex */
1185 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1186 || nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1187 rtnh->rtnh_ifindex = nexthop->ifindex;
1188
1189 if (nexthop->rmap_src.ipv4.s_addr)
1190 *src = &nexthop->rmap_src;
1191 else if (nexthop->src.ipv4.s_addr)
1192 *src = &nexthop->src;
1193
1194 if (IS_ZEBRA_DEBUG_KERNEL)
1195 zlog_debug(
1196 "netlink_route_multipath() (%s): "
1197 "nexthop via if %u",
1198 routedesc, nexthop->ifindex);
1199 } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1200 rtnh->rtnh_ifindex = nexthop->ifindex;
1201
1202 if (IS_ZEBRA_DEBUG_KERNEL)
1203 zlog_debug(
1204 "netlink_route_multipath() (%s): "
1205 "nexthop via if %u",
1206 routedesc, nexthop->ifindex);
1207 } else {
1208 rtnh->rtnh_ifindex = 0;
1209 }
1210 }
1211
1212 static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1213 zebra_nhlfe_t *nhlfe,
1214 struct nlmsghdr *nlmsg,
1215 struct rtmsg *rtmsg,
1216 size_t req_size, int cmd)
1217 {
1218 int bytelen;
1219 u_char family;
1220
1221 family = NHLFE_FAMILY(nhlfe);
1222 bytelen = (family == AF_INET ? 4 : 16);
1223 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1224 nlmsg, rtmsg, req_size, cmd);
1225 }
1226
1227
1228 static inline void
1229 _netlink_mpls_build_multipath(const char *routedesc, zebra_nhlfe_t *nhlfe,
1230 struct rtattr *rta, struct rtnexthop *rtnh,
1231 struct rtmsg *rtmsg, union g_addr **src)
1232 {
1233 int bytelen;
1234 u_char family;
1235
1236 family = NHLFE_FAMILY(nhlfe);
1237 bytelen = (family == AF_INET ? 4 : 16);
1238 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1239 rtnh, rtmsg, src);
1240 }
1241
1242
1243 /* Log debug information for netlink_route_multipath
1244 * if debug logging is enabled.
1245 *
1246 * @param cmd: Netlink command which is to be processed
1247 * @param p: Prefix for which the change is due
1248 * @param nexthop: Nexthop which is currently processed
1249 * @param routedesc: Semantic annotation for nexthop
1250 * (recursive, multipath, etc.)
1251 * @param family: Address family which the change concerns
1252 */
1253 static void _netlink_route_debug(int cmd, struct prefix *p,
1254 struct nexthop *nexthop, const char *routedesc,
1255 int family, struct zebra_vrf *zvrf)
1256 {
1257 if (IS_ZEBRA_DEBUG_KERNEL) {
1258 char buf[PREFIX_STRLEN];
1259 zlog_debug(
1260 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1261 routedesc, nl_msg_type_to_str(cmd),
1262 prefix2str(p, buf, sizeof(buf)), zvrf_id(zvrf),
1263 (nexthop) ? nexthop_type_to_str(nexthop->type) : "UNK");
1264 }
1265 }
1266
1267 static void _netlink_mpls_debug(int cmd, u_int32_t label, const char *routedesc)
1268 {
1269 if (IS_ZEBRA_DEBUG_KERNEL)
1270 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1271 nl_msg_type_to_str(cmd), label);
1272 }
1273
1274 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1275 int llalen)
1276 {
1277 struct {
1278 struct nlmsghdr n;
1279 struct ndmsg ndm;
1280 char buf[256];
1281 } req;
1282
1283 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1284
1285 memset(&req.n, 0, sizeof(req.n));
1286 memset(&req.ndm, 0, sizeof(req.ndm));
1287
1288 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1289 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1290 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1291 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1292
1293 req.ndm.ndm_family = AF_INET;
1294 req.ndm.ndm_state = NUD_PERMANENT;
1295 req.ndm.ndm_ifindex = ifindex;
1296 req.ndm.ndm_type = RTN_UNICAST;
1297
1298 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1299 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1300
1301 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1302 0);
1303 }
1304
1305 /* Routing table change via netlink interface. */
1306 /* Update flag indicates whether this is a "replace" or not. */
1307 static int netlink_route_multipath(int cmd, struct prefix *p,
1308 struct prefix *src_p, struct route_entry *re,
1309 int update)
1310 {
1311 int bytelen;
1312 struct sockaddr_nl snl;
1313 struct nexthop *nexthop = NULL;
1314 unsigned int nexthop_num;
1315 int discard = 0;
1316 int family = PREFIX_FAMILY(p);
1317 const char *routedesc;
1318 int setsrc = 0;
1319 union g_addr src;
1320
1321 struct {
1322 struct nlmsghdr n;
1323 struct rtmsg r;
1324 char buf[NL_PKT_BUF_SIZE];
1325 } req;
1326
1327 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1328 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1329
1330 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1331
1332 bytelen = (family == AF_INET ? 4 : 16);
1333
1334 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1335 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1336 if ((cmd == RTM_NEWROUTE) && update)
1337 req.n.nlmsg_flags |= NLM_F_REPLACE;
1338 req.n.nlmsg_type = cmd;
1339 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1340
1341 req.r.rtm_family = family;
1342 req.r.rtm_dst_len = p->prefixlen;
1343 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1344 req.r.rtm_protocol = zebra2proto(re->type);
1345 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1346 req.r.rtm_type = RTN_UNICAST;
1347
1348 addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1349 if (src_p)
1350 addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix,
1351 bytelen);
1352
1353 /* Metric. */
1354 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1355 * used
1356 * either by the kernel or by zebra. Its purely for calculating best
1357 * path(s)
1358 * by the routing protocol and for communicating with protocol peers.
1359 */
1360 addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1361
1362 /* Table corresponding to this route. */
1363 if (re->table < 256)
1364 req.r.rtm_table = re->table;
1365 else {
1366 req.r.rtm_table = RT_TABLE_UNSPEC;
1367 addattr32(&req.n, sizeof req, RTA_TABLE, re->table);
1368 }
1369
1370 if (discard)
1371 goto skip;
1372
1373 if (re->mtu || re->nexthop_mtu) {
1374 char buf[NL_PKT_BUF_SIZE];
1375 struct rtattr *rta = (void *)buf;
1376 u_int32_t mtu = re->mtu;
1377 if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu))
1378 mtu = re->nexthop_mtu;
1379 rta->rta_type = RTA_METRICS;
1380 rta->rta_len = RTA_LENGTH(0);
1381 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1382 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1383 RTA_PAYLOAD(rta));
1384 }
1385
1386 /* Count overall nexthops so we can decide whether to use singlepath
1387 * or multipath case. */
1388 nexthop_num = 0;
1389 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1390 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1391 continue;
1392 if (cmd == RTM_NEWROUTE
1393 && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1394 continue;
1395 if (cmd == RTM_DELROUTE
1396 && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
1397 continue;
1398
1399 nexthop_num++;
1400 }
1401
1402 /* Singlepath case. */
1403 if (nexthop_num == 1 || multipath_num == 1) {
1404 nexthop_num = 0;
1405 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1406 /*
1407 * So we want to cover 2 types of blackhole
1408 * routes here:
1409 * 1) A normal blackhole route( ala from a static
1410 * install.
1411 * 2) A recursively resolved blackhole route
1412 */
1413 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1414 switch (nexthop->bh_type) {
1415 case BLACKHOLE_ADMINPROHIB:
1416 req.r.rtm_type = RTN_PROHIBIT;
1417 break;
1418 case BLACKHOLE_REJECT:
1419 req.r.rtm_type = RTN_UNREACHABLE;
1420 break;
1421 default:
1422 req.r.rtm_type = RTN_BLACKHOLE;
1423 break;
1424 }
1425 goto skip;
1426 }
1427 if (CHECK_FLAG(nexthop->flags,
1428 NEXTHOP_FLAG_RECURSIVE)) {
1429 if (!setsrc) {
1430 if (family == AF_INET) {
1431 if (nexthop->rmap_src.ipv4
1432 .s_addr
1433 != 0) {
1434 src.ipv4 =
1435 nexthop->rmap_src
1436 .ipv4;
1437 setsrc = 1;
1438 } else if (nexthop->src.ipv4
1439 .s_addr
1440 != 0) {
1441 src.ipv4 =
1442 nexthop->src
1443 .ipv4;
1444 setsrc = 1;
1445 }
1446 } else if (family == AF_INET6) {
1447 if (!IN6_IS_ADDR_UNSPECIFIED(
1448 &nexthop->rmap_src
1449 .ipv6)) {
1450 src.ipv6 =
1451 nexthop->rmap_src
1452 .ipv6;
1453 setsrc = 1;
1454 } else if (
1455 !IN6_IS_ADDR_UNSPECIFIED(
1456 &nexthop->src
1457 .ipv6)) {
1458 src.ipv6 =
1459 nexthop->src
1460 .ipv6;
1461 setsrc = 1;
1462 }
1463 }
1464 }
1465 continue;
1466 }
1467
1468 if ((cmd == RTM_NEWROUTE
1469 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1470 || (cmd == RTM_DELROUTE
1471 && CHECK_FLAG(nexthop->flags,
1472 NEXTHOP_FLAG_FIB))) {
1473 routedesc = nexthop->rparent
1474 ? "recursive, single-path"
1475 : "single-path";
1476
1477 _netlink_route_debug(cmd, p, nexthop, routedesc,
1478 family, zvrf);
1479 _netlink_route_build_singlepath(
1480 routedesc, bytelen, nexthop, &req.n,
1481 &req.r, sizeof req, cmd);
1482 nexthop_num++;
1483 break;
1484 }
1485 }
1486 if (setsrc && (cmd == RTM_NEWROUTE)) {
1487 if (family == AF_INET)
1488 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1489 &src.ipv4, bytelen);
1490 else if (family == AF_INET6)
1491 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1492 &src.ipv6, bytelen);
1493 }
1494 } else {
1495 char buf[NL_PKT_BUF_SIZE];
1496 struct rtattr *rta = (void *)buf;
1497 struct rtnexthop *rtnh;
1498 union g_addr *src1 = NULL;
1499
1500 rta->rta_type = RTA_MULTIPATH;
1501 rta->rta_len = RTA_LENGTH(0);
1502 rtnh = RTA_DATA(rta);
1503
1504 nexthop_num = 0;
1505 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1506 if (nexthop_num >= multipath_num)
1507 break;
1508
1509 if (CHECK_FLAG(nexthop->flags,
1510 NEXTHOP_FLAG_RECURSIVE)) {
1511 /* This only works for IPv4 now */
1512 if (!setsrc) {
1513 if (family == AF_INET) {
1514 if (nexthop->rmap_src.ipv4
1515 .s_addr
1516 != 0) {
1517 src.ipv4 =
1518 nexthop->rmap_src
1519 .ipv4;
1520 setsrc = 1;
1521 } else if (nexthop->src.ipv4
1522 .s_addr
1523 != 0) {
1524 src.ipv4 =
1525 nexthop->src
1526 .ipv4;
1527 setsrc = 1;
1528 }
1529 } else if (family == AF_INET6) {
1530 if (!IN6_IS_ADDR_UNSPECIFIED(
1531 &nexthop->rmap_src
1532 .ipv6)) {
1533 src.ipv6 =
1534 nexthop->rmap_src
1535 .ipv6;
1536 setsrc = 1;
1537 } else if (
1538 !IN6_IS_ADDR_UNSPECIFIED(
1539 &nexthop->src
1540 .ipv6)) {
1541 src.ipv6 =
1542 nexthop->src
1543 .ipv6;
1544 setsrc = 1;
1545 }
1546 }
1547 }
1548 continue;
1549 }
1550
1551 if ((cmd == RTM_NEWROUTE
1552 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1553 || (cmd == RTM_DELROUTE
1554 && CHECK_FLAG(nexthop->flags,
1555 NEXTHOP_FLAG_FIB))) {
1556 routedesc = nexthop->rparent
1557 ? "recursive, multipath"
1558 : "multipath";
1559 nexthop_num++;
1560
1561 _netlink_route_debug(cmd, p, nexthop, routedesc,
1562 family, zvrf);
1563 _netlink_route_build_multipath(
1564 routedesc, bytelen, nexthop, rta, rtnh,
1565 &req.r, &src1);
1566 rtnh = RTNH_NEXT(rtnh);
1567
1568 if (!setsrc && src1) {
1569 if (family == AF_INET)
1570 src.ipv4 = src1->ipv4;
1571 else if (family == AF_INET6)
1572 src.ipv6 = src1->ipv6;
1573
1574 setsrc = 1;
1575 }
1576 }
1577 }
1578 if (setsrc && (cmd == RTM_NEWROUTE)) {
1579 if (family == AF_INET)
1580 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1581 &src.ipv4, bytelen);
1582 else if (family == AF_INET6)
1583 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1584 &src.ipv6, bytelen);
1585 if (IS_ZEBRA_DEBUG_KERNEL)
1586 zlog_debug("Setting source");
1587 }
1588
1589 if (rta->rta_len > RTA_LENGTH(0))
1590 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1591 RTA_DATA(rta), RTA_PAYLOAD(rta));
1592 }
1593
1594 /* If there is no useful nexthop then return. */
1595 if (nexthop_num == 0) {
1596 if (IS_ZEBRA_DEBUG_KERNEL)
1597 zlog_debug(
1598 "netlink_route_multipath(): No useful nexthop.");
1599 return 0;
1600 }
1601
1602 skip:
1603
1604 /* Destination netlink address. */
1605 memset(&snl, 0, sizeof snl);
1606 snl.nl_family = AF_NETLINK;
1607
1608 /* Talk to netlink socket. */
1609 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1610 0);
1611 }
1612
1613 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1614 {
1615 int suc = 0;
1616 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1617 struct {
1618 struct nlmsghdr n;
1619 struct ndmsg ndm;
1620 char buf[256];
1621 } req;
1622
1623 mroute = mr;
1624 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1625
1626 memset(&req.n, 0, sizeof(req.n));
1627 memset(&req.ndm, 0, sizeof(req.ndm));
1628
1629 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1630 req.n.nlmsg_flags = NLM_F_REQUEST;
1631 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1632
1633 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1634 req.n.nlmsg_type = RTM_GETROUTE;
1635
1636 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1637 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1638 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1639 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1640 addattr_l(&req.n, sizeof(req), RTA_TABLE, &zvrf->table_id, 4);
1641
1642 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1643 &zns->netlink_cmd, zns, 0);
1644
1645 mroute = NULL;
1646 return suc;
1647 }
1648
1649 int kernel_route_rib(struct prefix *p, struct prefix *src_p,
1650 struct route_entry *old, struct route_entry *new)
1651 {
1652 assert(old || new);
1653
1654 if (!old && new)
1655 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 0);
1656 if (old && !new)
1657 return netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0);
1658
1659 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 1);
1660 }
1661
1662 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1663 int llalen)
1664 {
1665 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1666 addr, lla, llalen);
1667 }
1668
1669 /*
1670 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1671 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1672 */
1673 static int netlink_vxlan_flood_list_update(struct interface *ifp,
1674 struct in_addr *vtep_ip, int cmd)
1675 {
1676 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1677 struct {
1678 struct nlmsghdr n;
1679 struct ndmsg ndm;
1680 char buf[256];
1681 } req;
1682 u_char dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1683
1684 memset(&req.n, 0, sizeof(req.n));
1685 memset(&req.ndm, 0, sizeof(req.ndm));
1686
1687 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1688 req.n.nlmsg_flags = NLM_F_REQUEST;
1689 if (cmd == RTM_NEWNEIGH)
1690 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1691 req.n.nlmsg_type = cmd;
1692 req.ndm.ndm_family = PF_BRIDGE;
1693 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1694 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1695
1696
1697 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1698 req.ndm.ndm_ifindex = ifp->ifindex;
1699 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1700
1701 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1702 0);
1703 }
1704
1705 /*
1706 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1707 * adding
1708 * a "flood" MAC FDB entry.
1709 */
1710 int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1711 {
1712 if (IS_ZEBRA_DEBUG_VXLAN)
1713 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1714 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1715
1716 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
1717 }
1718
1719 /*
1720 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1721 * deleting the "flood" MAC FDB entry.
1722 */
1723 int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1724 {
1725 if (IS_ZEBRA_DEBUG_VXLAN)
1726 zlog_debug(
1727 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1728 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1729
1730 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
1731 }
1732
1733 #ifndef NDA_RTA
1734 #define NDA_RTA(r) \
1735 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1736 #endif
1737
1738 static int netlink_macfdb_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
1739 int len)
1740 {
1741 struct ndmsg *ndm;
1742 struct interface *ifp;
1743 struct zebra_if *zif;
1744 struct zebra_vrf *zvrf;
1745 struct rtattr *tb[NDA_MAX + 1];
1746 struct interface *br_if;
1747 struct ethaddr mac;
1748 vlanid_t vid = 0;
1749 struct prefix vtep_ip;
1750 int vid_present = 0, dst_present = 0;
1751 char buf[ETHER_ADDR_STRLEN];
1752 char vid_buf[20];
1753 char dst_buf[30];
1754 u_char sticky = 0;
1755
1756 ndm = NLMSG_DATA(h);
1757
1758 /* The interface should exist. */
1759 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
1760 ndm->ndm_ifindex);
1761 if (!ifp)
1762 return 0;
1763
1764 /* Locate VRF corresponding to interface. We only process MAC
1765 * notifications
1766 * if EVPN is enabled on this VRF.
1767 */
1768 zvrf = vrf_info_lookup(ifp->vrf_id);
1769 if (!zvrf || !EVPN_ENABLED(zvrf))
1770 return 0;
1771 if (!ifp->info)
1772 return 0;
1773
1774 /* The interface should be something we're interested in. */
1775 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
1776 return 0;
1777
1778 /* Drop "permanent" entries. */
1779 if (ndm->ndm_state & NUD_PERMANENT)
1780 return 0;
1781
1782 zif = (struct zebra_if *)ifp->info;
1783 if ((br_if = zif->brslave_info.br_if) == NULL) {
1784 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1785 nl_msg_type_to_str(h->nlmsg_type),
1786 nl_family_to_str(ndm->ndm_family), ifp->name,
1787 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1788 return 0;
1789 }
1790
1791 /* Parse attributes and extract fields of interest. */
1792 memset(tb, 0, sizeof tb);
1793 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
1794
1795 if (!tb[NDA_LLADDR]) {
1796 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1797 nl_msg_type_to_str(h->nlmsg_type),
1798 nl_family_to_str(ndm->ndm_family), ifp->name,
1799 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1800 return 0;
1801 }
1802
1803 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
1804 zlog_warn(
1805 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
1806 nl_msg_type_to_str(h->nlmsg_type),
1807 nl_family_to_str(ndm->ndm_family), ifp->name,
1808 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex,
1809 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
1810 return 0;
1811 }
1812
1813 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
1814
1815 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
1816 vid_present = 1;
1817 vid = *(u_int16_t *)RTA_DATA(tb[NDA_VLAN]);
1818 sprintf(vid_buf, " VLAN %u", vid);
1819 }
1820
1821 if (tb[NDA_DST]) {
1822 /* TODO: Only IPv4 supported now. */
1823 dst_present = 1;
1824 vtep_ip.family = AF_INET;
1825 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
1826 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
1827 IPV4_MAX_BYTELEN);
1828 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
1829 }
1830
1831 sticky = (ndm->ndm_state & NUD_NOARP) ? 1 : 0;
1832
1833 if (IS_ZEBRA_DEBUG_KERNEL)
1834 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1835 nl_msg_type_to_str(h->nlmsg_type),
1836 nl_family_to_str(ndm->ndm_family), ifp->name,
1837 ndm->ndm_ifindex, vid_present ? vid_buf : "",
1838 sticky ? "sticky " : "",
1839 prefix_mac2str(&mac, buf, sizeof(buf)),
1840 dst_present ? dst_buf : "");
1841
1842 if (filter_vlan && vid != filter_vlan)
1843 return 0;
1844
1845 /* If add or update, do accordingly if learnt on a "local" interface; if
1846 * the notification is over VxLAN, this has to be related to
1847 * multi-homing,
1848 * so perform an implicit delete of any local entry (if it exists).
1849 */
1850 if (h->nlmsg_type == RTM_NEWNEIGH) {
1851 /* Drop "permanent" entries. */
1852 if (ndm->ndm_state & NUD_PERMANENT)
1853 return 0;
1854
1855 if (IS_ZEBRA_IF_VXLAN(ifp))
1856 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
1857 vid);
1858
1859 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
1860 sticky);
1861 }
1862
1863 /* This is a delete notification.
1864 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1865 * 2. For a MAC over "local" interface, delete the mac
1866 * Note: We will get notifications from both bridge driver and VxLAN
1867 * driver.
1868 * Ignore the notification from VxLan driver as it is also generated
1869 * when mac moves from remote to local.
1870 */
1871 if (dst_present)
1872 return 0;
1873
1874 if (IS_ZEBRA_IF_VXLAN(ifp))
1875 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
1876 vid);
1877
1878 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
1879 }
1880
1881 static int netlink_macfdb_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
1882 ns_id_t ns_id, int startup)
1883 {
1884 int len;
1885 struct ndmsg *ndm;
1886
1887 if (h->nlmsg_type != RTM_NEWNEIGH)
1888 return 0;
1889
1890 /* Length validity. */
1891 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
1892 if (len < 0)
1893 return -1;
1894
1895 /* We are interested only in AF_BRIDGE notifications. */
1896 ndm = NLMSG_DATA(h);
1897 if (ndm->ndm_family != AF_BRIDGE)
1898 return 0;
1899
1900 return netlink_macfdb_change(snl, h, len);
1901 }
1902
1903 /* Request for MAC FDB information from the kernel */
1904 static int netlink_request_macs(struct zebra_ns *zns, int family, int type,
1905 ifindex_t master_ifindex)
1906 {
1907 struct {
1908 struct nlmsghdr n;
1909 struct ifinfomsg ifm;
1910 char buf[256];
1911 } req;
1912
1913 /* Form the request, specifying filter (rtattr) if needed. */
1914 memset(&req, 0, sizeof(req));
1915 req.n.nlmsg_type = type;
1916 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1917 req.ifm.ifi_family = family;
1918 if (master_ifindex)
1919 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
1920
1921 return netlink_request(&zns->netlink_cmd, &req.n);
1922 }
1923
1924 /*
1925 * MAC forwarding database read using netlink interface. This is invoked
1926 * at startup.
1927 */
1928 int netlink_macfdb_read(struct zebra_ns *zns)
1929 {
1930 int ret;
1931
1932 /* Get bridge FDB table. */
1933 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH, 0);
1934 if (ret < 0)
1935 return ret;
1936 /* We are reading entire table. */
1937 filter_vlan = 0;
1938 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1939 0, 1);
1940
1941 return ret;
1942 }
1943
1944 /*
1945 * MAC forwarding database read using netlink interface. This is for a
1946 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1947 */
1948 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
1949 struct interface *br_if)
1950 {
1951 struct zebra_if *br_zif;
1952 struct zebra_if *zif;
1953 struct zebra_l2info_vxlan *vxl;
1954 int ret = 0;
1955
1956
1957 /* Save VLAN we're filtering on, if needed. */
1958 br_zif = (struct zebra_if *)br_if->info;
1959 zif = (struct zebra_if *)ifp->info;
1960 vxl = &zif->l2info.vxl;
1961 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
1962 filter_vlan = vxl->access_vlan;
1963
1964 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
1965 */
1966 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH,
1967 br_if->ifindex);
1968 if (ret < 0)
1969 return ret;
1970 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1971 0, 0);
1972
1973 /* Reset VLAN filter. */
1974 filter_vlan = 0;
1975 return ret;
1976 }
1977
1978 static int netlink_macfdb_update(struct interface *ifp, vlanid_t vid,
1979 struct ethaddr *mac, struct in_addr vtep_ip,
1980 int local, int cmd, u_char sticky)
1981 {
1982 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1983 struct {
1984 struct nlmsghdr n;
1985 struct ndmsg ndm;
1986 char buf[256];
1987 } req;
1988 int dst_alen;
1989 struct zebra_if *zif;
1990 struct interface *br_if;
1991 struct zebra_if *br_zif;
1992 char buf[ETHER_ADDR_STRLEN];
1993 int vid_present = 0, dst_present = 0;
1994 char vid_buf[20];
1995 char dst_buf[30];
1996
1997 zif = ifp->info;
1998 if ((br_if = zif->brslave_info.br_if) == NULL) {
1999 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
2000 (cmd == RTM_NEWNEIGH) ? "add" : "del", ifp->name,
2001 ifp->ifindex);
2002 return -1;
2003 }
2004
2005 memset(&req.n, 0, sizeof(req.n));
2006 memset(&req.ndm, 0, sizeof(req.ndm));
2007
2008 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2009 req.n.nlmsg_flags = NLM_F_REQUEST;
2010 if (cmd == RTM_NEWNEIGH)
2011 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2012 req.n.nlmsg_type = cmd;
2013 req.ndm.ndm_family = AF_BRIDGE;
2014 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2015 req.ndm.ndm_state = NUD_REACHABLE;
2016
2017 if (sticky)
2018 req.ndm.ndm_state |= NUD_NOARP;
2019 else
2020 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2021
2022 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2023 req.ndm.ndm_ifindex = ifp->ifindex;
2024 if (!local) {
2025 dst_alen = 4; // TODO: hardcoded
2026 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2027 dst_present = 1;
2028 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
2029 }
2030 br_zif = (struct zebra_if *)br_if->info;
2031 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2032 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2033 vid_present = 1;
2034 sprintf(vid_buf, " VLAN %u", vid);
2035 }
2036 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2037
2038 if (IS_ZEBRA_DEBUG_KERNEL)
2039 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2040 nl_msg_type_to_str(cmd),
2041 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2042 ifp->ifindex, vid_present ? vid_buf : "",
2043 sticky ? "sticky " : "",
2044 prefix_mac2str(mac, buf, sizeof(buf)),
2045 dst_present ? dst_buf : "");
2046
2047 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2048 0);
2049 }
2050
2051 #define NUD_VALID \
2052 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2053 | NUD_DELAY)
2054
2055 static int netlink_ipneigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2056 int len)
2057 {
2058 struct ndmsg *ndm;
2059 struct interface *ifp;
2060 struct zebra_if *zif;
2061 struct zebra_vrf *zvrf;
2062 struct rtattr *tb[NDA_MAX + 1];
2063 struct interface *link_if;
2064 struct ethaddr mac;
2065 struct ipaddr ip;
2066 char buf[ETHER_ADDR_STRLEN];
2067 char buf2[INET6_ADDRSTRLEN];
2068 int mac_present = 0;
2069 u_char ext_learned;
2070
2071 ndm = NLMSG_DATA(h);
2072
2073 /* The interface should exist. */
2074 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2075 ndm->ndm_ifindex);
2076 if (!ifp)
2077 return 0;
2078
2079 /* Locate VRF corresponding to interface. We only process neigh
2080 * notifications
2081 * if EVPN is enabled on this VRF.
2082 */
2083 zvrf = vrf_info_lookup(ifp->vrf_id);
2084 if (!zvrf || !EVPN_ENABLED(zvrf))
2085 return 0;
2086 if (!ifp->info)
2087 return 0;
2088
2089 /* Drop "permanent" entries. */
2090 if (ndm->ndm_state & NUD_PERMANENT)
2091 return 0;
2092
2093 zif = (struct zebra_if *)ifp->info;
2094 /* The neighbor is present on an SVI. From this, we locate the
2095 * underlying
2096 * bridge because we're only interested in neighbors on a VxLAN bridge.
2097 * The bridge is located based on the nature of the SVI:
2098 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2099 * interface
2100 * and is linked to the bridge
2101 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2102 * inteface
2103 * itself
2104 */
2105 if (IS_ZEBRA_IF_VLAN(ifp)) {
2106 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2107 zif->link_ifindex);
2108 if (!link_if)
2109 return 0;
2110 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2111 link_if = ifp;
2112 else
2113 return 0;
2114
2115 /* Parse attributes and extract fields of interest. */
2116 memset(tb, 0, sizeof tb);
2117 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2118
2119 if (!tb[NDA_DST]) {
2120 zlog_warn("%s family %s IF %s(%u) - no DST",
2121 nl_msg_type_to_str(h->nlmsg_type),
2122 nl_family_to_str(ndm->ndm_family), ifp->name,
2123 ndm->ndm_ifindex);
2124 return 0;
2125 }
2126 memset(&mac, 0, sizeof(struct ethaddr));
2127 memset(&ip, 0, sizeof(struct ipaddr));
2128 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2129 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2130
2131 if (h->nlmsg_type == RTM_NEWNEIGH) {
2132 if (tb[NDA_LLADDR]) {
2133 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2134 zlog_warn(
2135 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2136 nl_msg_type_to_str(h->nlmsg_type),
2137 nl_family_to_str(ndm->ndm_family),
2138 ifp->name, ndm->ndm_ifindex,
2139 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2140 return 0;
2141 }
2142
2143 mac_present = 1;
2144 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2145 }
2146
2147 ext_learned = (ndm->ndm_flags & NTF_EXT_LEARNED) ? 1 : 0;
2148
2149 if (IS_ZEBRA_DEBUG_KERNEL)
2150 zlog_debug(
2151 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2152 nl_msg_type_to_str(h->nlmsg_type),
2153 nl_family_to_str(ndm->ndm_family), ifp->name,
2154 ndm->ndm_ifindex,
2155 ipaddr2str(&ip, buf2, sizeof(buf2)),
2156 mac_present
2157 ? prefix_mac2str(&mac, buf, sizeof(buf))
2158 : "",
2159 ndm->ndm_state, ndm->ndm_flags);
2160
2161 /* If the neighbor state is valid for use, process as an add or
2162 * update
2163 * else process as a delete. Note that the delete handling may
2164 * result
2165 * in re-adding the neighbor if it is a valid "remote" neighbor.
2166 */
2167 if (ndm->ndm_state & NUD_VALID)
2168 return zebra_vxlan_local_neigh_add_update(
2169 ifp, link_if, &ip, &mac, ndm->ndm_state,
2170 ext_learned);
2171
2172 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2173 }
2174
2175 if (IS_ZEBRA_DEBUG_KERNEL)
2176 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2177 nl_msg_type_to_str(h->nlmsg_type),
2178 nl_family_to_str(ndm->ndm_family), ifp->name,
2179 ndm->ndm_ifindex,
2180 ipaddr2str(&ip, buf2, sizeof(buf2)));
2181
2182 /* Process the delete - it may result in re-adding the neighbor if it is
2183 * a valid "remote" neighbor.
2184 */
2185 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2186 }
2187
2188 static int netlink_neigh_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
2189 ns_id_t ns_id, int startup)
2190 {
2191 int len;
2192 struct ndmsg *ndm;
2193
2194 if (h->nlmsg_type != RTM_NEWNEIGH)
2195 return 0;
2196
2197 /* Length validity. */
2198 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2199 if (len < 0)
2200 return -1;
2201
2202 /* We are interested only in AF_INET or AF_INET6 notifications. */
2203 ndm = NLMSG_DATA(h);
2204 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2205 return 0;
2206
2207 return netlink_neigh_change(snl, h, len);
2208 }
2209
2210 /* Request for IP neighbor information from the kernel */
2211 static int netlink_request_neigh(struct zebra_ns *zns, int family, int type,
2212 ifindex_t ifindex)
2213 {
2214 struct {
2215 struct nlmsghdr n;
2216 struct ndmsg ndm;
2217 char buf[256];
2218 } req;
2219
2220 /* Form the request, specifying filter (rtattr) if needed. */
2221 memset(&req, 0, sizeof(req));
2222 req.n.nlmsg_type = type;
2223 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2224 req.ndm.ndm_family = family;
2225 if (ifindex)
2226 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2227
2228 return netlink_request(&zns->netlink_cmd, &req.n);
2229 }
2230
2231 /*
2232 * IP Neighbor table read using netlink interface. This is invoked
2233 * at startup.
2234 */
2235 int netlink_neigh_read(struct zebra_ns *zns)
2236 {
2237 int ret;
2238
2239 /* Get IP neighbor table. */
2240 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH, 0);
2241 if (ret < 0)
2242 return ret;
2243 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2244 1);
2245
2246 return ret;
2247 }
2248
2249 /*
2250 * IP Neighbor table read using netlink interface. This is for a specific
2251 * VLAN device.
2252 */
2253 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2254 {
2255 int ret = 0;
2256
2257 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH,
2258 vlan_if->ifindex);
2259 if (ret < 0)
2260 return ret;
2261 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2262 0);
2263
2264 return ret;
2265 }
2266
2267 int netlink_neigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2268 ns_id_t ns_id)
2269 {
2270 int len;
2271 struct ndmsg *ndm;
2272
2273 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2274 return 0;
2275
2276 /* Length validity. */
2277 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2278 if (len < 0)
2279 return -1;
2280
2281 /* Is this a notification for the MAC FDB or IP neighbor table? */
2282 ndm = NLMSG_DATA(h);
2283 if (ndm->ndm_family == AF_BRIDGE)
2284 return netlink_macfdb_change(snl, h, len);
2285
2286 if (ndm->ndm_type != RTN_UNICAST)
2287 return 0;
2288
2289 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2290 return netlink_ipneigh_change(snl, h, len);
2291
2292 return 0;
2293 }
2294
2295 static int netlink_neigh_update2(struct interface *ifp, struct ipaddr *ip,
2296 struct ethaddr *mac, u_int32_t flags, int cmd)
2297 {
2298 struct {
2299 struct nlmsghdr n;
2300 struct ndmsg ndm;
2301 char buf[256];
2302 } req;
2303 int ipa_len;
2304
2305 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2306 char buf[INET6_ADDRSTRLEN];
2307 char buf2[ETHER_ADDR_STRLEN];
2308
2309 memset(&req.n, 0, sizeof(req.n));
2310 memset(&req.ndm, 0, sizeof(req.ndm));
2311
2312 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2313 req.n.nlmsg_flags = NLM_F_REQUEST;
2314 if (cmd == RTM_NEWNEIGH)
2315 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2316 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2317 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2318 req.ndm.ndm_state = flags;
2319 req.ndm.ndm_ifindex = ifp->ifindex;
2320 req.ndm.ndm_type = RTN_UNICAST;
2321 req.ndm.ndm_flags = NTF_EXT_LEARNED;
2322
2323
2324 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2325 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2326 if (mac)
2327 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2328
2329 if (IS_ZEBRA_DEBUG_KERNEL)
2330 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2331 nl_msg_type_to_str(cmd),
2332 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2333 ifp->ifindex, ipaddr2str(ip, buf, sizeof(buf)),
2334 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2335 : "null");
2336
2337 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2338 0);
2339 }
2340
2341 int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2342 struct in_addr vtep_ip, u_char sticky)
2343 {
2344 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, 0, RTM_NEWNEIGH,
2345 sticky);
2346 }
2347
2348 int kernel_del_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2349 struct in_addr vtep_ip, int local)
2350 {
2351 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, local,
2352 RTM_DELNEIGH, 0);
2353 }
2354
2355 int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
2356 struct ethaddr *mac)
2357 {
2358 return netlink_neigh_update2(ifp, ip, mac, NUD_REACHABLE, RTM_NEWNEIGH);
2359 }
2360
2361 int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip)
2362 {
2363 return netlink_neigh_update2(ifp, ip, NULL, 0, RTM_DELNEIGH);
2364 }
2365
2366 /*
2367 * MPLS label forwarding table change via netlink interface.
2368 */
2369 int netlink_mpls_multipath(int cmd, zebra_lsp_t *lsp)
2370 {
2371 mpls_lse_t lse;
2372 zebra_nhlfe_t *nhlfe;
2373 struct nexthop *nexthop = NULL;
2374 unsigned int nexthop_num;
2375 const char *routedesc;
2376 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2377 int route_type;
2378
2379 struct {
2380 struct nlmsghdr n;
2381 struct rtmsg r;
2382 char buf[NL_PKT_BUF_SIZE];
2383 } req;
2384
2385 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
2386
2387
2388 /*
2389 * Count # nexthops so we can decide whether to use singlepath
2390 * or multipath case.
2391 */
2392 nexthop_num = 0;
2393 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2394 nexthop = nhlfe->nexthop;
2395 if (!nexthop)
2396 continue;
2397 if (cmd == RTM_NEWROUTE) {
2398 /* Count all selected NHLFEs */
2399 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2400 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2401 nexthop_num++;
2402 } else /* DEL */
2403 {
2404 /* Count all installed NHLFEs */
2405 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2406 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2407 nexthop_num++;
2408 }
2409 }
2410
2411 if (nexthop_num == 0 || !lsp->best_nhlfe) // unexpected
2412 return 0;
2413
2414 route_type = re_type_from_lsp_type(lsp->best_nhlfe->type);
2415
2416 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2417 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2418 req.n.nlmsg_type = cmd;
2419 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2420
2421 req.r.rtm_family = AF_MPLS;
2422 req.r.rtm_table = RT_TABLE_MAIN;
2423 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2424 req.r.rtm_protocol = zebra2proto(route_type);
2425 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2426 req.r.rtm_type = RTN_UNICAST;
2427
2428 if (cmd == RTM_NEWROUTE)
2429 /* We do a replace to handle update. */
2430 req.n.nlmsg_flags |= NLM_F_REPLACE;
2431
2432 /* Fill destination */
2433 lse = mpls_lse_encode(lsp->ile.in_label, 0, 0, 1);
2434 addattr_l(&req.n, sizeof req, RTA_DST, &lse, sizeof(mpls_lse_t));
2435
2436 /* Fill nexthops (paths) based on single-path or multipath. The paths
2437 * chosen depend on the operation.
2438 */
2439 if (nexthop_num == 1 || multipath_num == 1) {
2440 routedesc = "single-path";
2441 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2442
2443 nexthop_num = 0;
2444 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2445 nexthop = nhlfe->nexthop;
2446 if (!nexthop)
2447 continue;
2448
2449 if ((cmd == RTM_NEWROUTE
2450 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2451 && CHECK_FLAG(nexthop->flags,
2452 NEXTHOP_FLAG_ACTIVE)))
2453 || (cmd == RTM_DELROUTE
2454 && (CHECK_FLAG(nhlfe->flags,
2455 NHLFE_FLAG_INSTALLED)
2456 && CHECK_FLAG(nexthop->flags,
2457 NEXTHOP_FLAG_FIB)))) {
2458 /* Add the gateway */
2459 _netlink_mpls_build_singlepath(routedesc, nhlfe,
2460 &req.n, &req.r,
2461 sizeof req, cmd);
2462 if (cmd == RTM_NEWROUTE) {
2463 SET_FLAG(nhlfe->flags,
2464 NHLFE_FLAG_INSTALLED);
2465 SET_FLAG(nexthop->flags,
2466 NEXTHOP_FLAG_FIB);
2467 } else {
2468 UNSET_FLAG(nhlfe->flags,
2469 NHLFE_FLAG_INSTALLED);
2470 UNSET_FLAG(nexthop->flags,
2471 NEXTHOP_FLAG_FIB);
2472 }
2473 nexthop_num++;
2474 break;
2475 }
2476 }
2477 } else /* Multipath case */
2478 {
2479 char buf[NL_PKT_BUF_SIZE];
2480 struct rtattr *rta = (void *)buf;
2481 struct rtnexthop *rtnh;
2482 union g_addr *src1 = NULL;
2483
2484 rta->rta_type = RTA_MULTIPATH;
2485 rta->rta_len = RTA_LENGTH(0);
2486 rtnh = RTA_DATA(rta);
2487
2488 routedesc = "multipath";
2489 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2490
2491 nexthop_num = 0;
2492 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2493 nexthop = nhlfe->nexthop;
2494 if (!nexthop)
2495 continue;
2496
2497 if (nexthop_num >= multipath_num)
2498 break;
2499
2500 if ((cmd == RTM_NEWROUTE
2501 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2502 && CHECK_FLAG(nexthop->flags,
2503 NEXTHOP_FLAG_ACTIVE)))
2504 || (cmd == RTM_DELROUTE
2505 && (CHECK_FLAG(nhlfe->flags,
2506 NHLFE_FLAG_INSTALLED)
2507 && CHECK_FLAG(nexthop->flags,
2508 NEXTHOP_FLAG_FIB)))) {
2509 nexthop_num++;
2510
2511 /* Build the multipath */
2512 _netlink_mpls_build_multipath(routedesc, nhlfe,
2513 rta, rtnh, &req.r,
2514 &src1);
2515 rtnh = RTNH_NEXT(rtnh);
2516
2517 if (cmd == RTM_NEWROUTE) {
2518 SET_FLAG(nhlfe->flags,
2519 NHLFE_FLAG_INSTALLED);
2520 SET_FLAG(nexthop->flags,
2521 NEXTHOP_FLAG_FIB);
2522 } else {
2523 UNSET_FLAG(nhlfe->flags,
2524 NHLFE_FLAG_INSTALLED);
2525 UNSET_FLAG(nexthop->flags,
2526 NEXTHOP_FLAG_FIB);
2527 }
2528 }
2529 }
2530
2531 /* Add the multipath */
2532 if (rta->rta_len > RTA_LENGTH(0))
2533 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
2534 RTA_DATA(rta), RTA_PAYLOAD(rta));
2535 }
2536
2537 /* Talk to netlink socket. */
2538 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2539 0);
2540 }
2541
2542 /*
2543 * Handle failure in LSP install, clear flags for NHLFE.
2544 */
2545 void clear_nhlfe_installed(zebra_lsp_t *lsp)
2546 {
2547 zebra_nhlfe_t *nhlfe;
2548 struct nexthop *nexthop;
2549
2550 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2551 nexthop = nhlfe->nexthop;
2552 if (!nexthop)
2553 continue;
2554
2555 UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED);
2556 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
2557 }
2558 }
2559
2560 #endif /* HAVE_NETLINK */