]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rt_netlink.c
Merge pull request #1297 from dslicenc/cm18154-import-table
[mirror_frr.git] / zebra / rt_netlink.c
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #ifdef HAVE_NETLINK
24
25 #include <net/if_arp.h>
26
27 /* Hack for GNU libc version 2. */
28 #ifndef MSG_TRUNC
29 #define MSG_TRUNC 0x20
30 #endif /* MSG_TRUNC */
31
32 #include "linklist.h"
33 #include "if.h"
34 #include "log.h"
35 #include "prefix.h"
36 #include "connected.h"
37 #include "table.h"
38 #include "memory.h"
39 #include "zebra_memory.h"
40 #include "rib.h"
41 #include "thread.h"
42 #include "privs.h"
43 #include "nexthop.h"
44 #include "vrf.h"
45 #include "vty.h"
46 #include "mpls.h"
47 #include "vxlan.h"
48
49 #include "zebra/zserv.h"
50 #include "zebra/zebra_ns.h"
51 #include "zebra/zebra_vrf.h"
52 #include "zebra/rt.h"
53 #include "zebra/redistribute.h"
54 #include "zebra/interface.h"
55 #include "zebra/debug.h"
56 #include "zebra/rtadv.h"
57 #include "zebra/zebra_ptm.h"
58 #include "zebra/zebra_mpls.h"
59 #include "zebra/kernel_netlink.h"
60 #include "zebra/rt_netlink.h"
61 #include "zebra/zebra_mroute.h"
62 #include "zebra/zebra_vxlan.h"
63
64
65 /* TODO - Temporary definitions, need to refine. */
66 #ifndef AF_MPLS
67 #define AF_MPLS 28
68 #endif
69
70 #ifndef RTA_VIA
71 #define RTA_VIA 18
72 #endif
73
74 #ifndef RTA_NEWDST
75 #define RTA_NEWDST 19
76 #endif
77
78 #ifndef RTA_ENCAP_TYPE
79 #define RTA_ENCAP_TYPE 21
80 #endif
81
82 #ifndef RTA_ENCAP
83 #define RTA_ENCAP 22
84 #endif
85
86 #ifndef RTA_EXPIRES
87 #define RTA_EXPIRES 23
88 #endif
89
90 #ifndef LWTUNNEL_ENCAP_MPLS
91 #define LWTUNNEL_ENCAP_MPLS 1
92 #endif
93
94 #ifndef MPLS_IPTUNNEL_DST
95 #define MPLS_IPTUNNEL_DST 1
96 #endif
97
98 #ifndef NDA_MASTER
99 #define NDA_MASTER 9
100 #endif
101
102 #ifndef NTF_MASTER
103 #define NTF_MASTER 0x04
104 #endif
105
106 #ifndef NTF_SELF
107 #define NTF_SELF 0x02
108 #endif
109
110 #ifndef NTF_EXT_LEARNED
111 #define NTF_EXT_LEARNED 0x10
112 #endif
113
114 #ifndef NDA_IFINDEX
115 #define NDA_IFINDEX 8
116 #endif
117
118 #ifndef NDA_VLAN
119 #define NDA_VLAN 5
120 #endif
121 /* End of temporary definitions */
122
123 static vlanid_t filter_vlan = 0;
124
125 struct gw_family_t {
126 u_int16_t filler;
127 u_int16_t family;
128 union g_addr gate;
129 };
130
131 char ipv4_ll_buf[16] = "169.254.0.1";
132 struct in_addr ipv4_ll;
133
134 /*
135 * The ipv4_ll data structure is used for all 5549
136 * additions to the kernel. Let's figure out the
137 * correct value one time instead for every
138 * install/remove of a 5549 type route
139 */
140 void rt_netlink_init(void)
141 {
142 inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
143 }
144
145 static inline int is_selfroute(int proto)
146 {
147 if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
148 || (proto == RTPROT_STATIC) || (proto == RTPROT_ZEBRA)
149 || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
150 || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
151 || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
152 || (proto == RTPROT_RIP)) {
153 return 1;
154 }
155
156 return 0;
157 }
158
159 static inline int zebra2proto(int proto)
160 {
161 switch (proto) {
162 case ZEBRA_ROUTE_BABEL:
163 proto = RTPROT_BABEL;
164 break;
165 case ZEBRA_ROUTE_BGP:
166 proto = RTPROT_BGP;
167 break;
168 case ZEBRA_ROUTE_OSPF:
169 case ZEBRA_ROUTE_OSPF6:
170 proto = RTPROT_OSPF;
171 break;
172 case ZEBRA_ROUTE_STATIC:
173 proto = RTPROT_STATIC;
174 break;
175 case ZEBRA_ROUTE_ISIS:
176 proto = RTPROT_ISIS;
177 break;
178 case ZEBRA_ROUTE_RIP:
179 proto = RTPROT_RIP;
180 break;
181 case ZEBRA_ROUTE_RIPNG:
182 proto = RTPROT_RIPNG;
183 break;
184 case ZEBRA_ROUTE_NHRP:
185 proto = RTPROT_NHRP;
186 break;
187 case ZEBRA_ROUTE_EIGRP:
188 proto = RTPROT_EIGRP;
189 break;
190 case ZEBRA_ROUTE_LDP:
191 proto = RTPROT_LDP;
192 break;
193 default:
194 proto = RTPROT_ZEBRA;
195 break;
196 }
197
198 return proto;
199 }
200
201 static inline int proto2zebra(int proto, int family)
202 {
203 switch (proto) {
204 case RTPROT_BABEL:
205 proto = ZEBRA_ROUTE_BABEL;
206 break;
207 case RTPROT_BGP:
208 proto = ZEBRA_ROUTE_BGP;
209 break;
210 case RTPROT_OSPF:
211 proto = (family == AFI_IP) ?
212 ZEBRA_ROUTE_OSPF : ZEBRA_ROUTE_OSPF6;
213 break;
214 case RTPROT_ISIS:
215 proto = ZEBRA_ROUTE_ISIS;
216 break;
217 case RTPROT_RIP:
218 proto = ZEBRA_ROUTE_RIP;
219 break;
220 case RTPROT_RIPNG:
221 proto = ZEBRA_ROUTE_RIPNG;
222 break;
223 case RTPROT_NHRP:
224 proto = ZEBRA_ROUTE_NHRP;
225 break;
226 case RTPROT_EIGRP:
227 proto = ZEBRA_ROUTE_EIGRP;
228 break;
229 case RTPROT_LDP:
230 proto = ZEBRA_ROUTE_LDP;
231 break;
232 case RTPROT_STATIC:
233 proto = ZEBRA_ROUTE_STATIC;
234 break;
235 default:
236 proto = ZEBRA_ROUTE_KERNEL;
237 break;
238 }
239 return proto;
240 }
241
242 /*
243 Pending: create an efficient table_id (in a tree/hash) based lookup)
244 */
245 static vrf_id_t vrf_lookup_by_table(u_int32_t table_id)
246 {
247 struct vrf *vrf;
248 struct zebra_vrf *zvrf;
249
250 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
251 if ((zvrf = vrf->info) == NULL || (zvrf->table_id != table_id))
252 continue;
253
254 return zvrf_id(zvrf);
255 }
256
257 return VRF_DEFAULT;
258 }
259
260 /* Looking up routing table by netlink interface. */
261 static int netlink_route_change_read_unicast(struct sockaddr_nl *snl,
262 struct nlmsghdr *h, ns_id_t ns_id,
263 int startup)
264 {
265 int len;
266 struct rtmsg *rtm;
267 struct rtattr *tb[RTA_MAX + 1];
268 u_char flags = 0;
269 struct prefix p;
270 struct prefix_ipv6 src_p = {};
271 vrf_id_t vrf_id = VRF_DEFAULT;
272
273 char anyaddr[16] = {0};
274
275 int proto = ZEBRA_ROUTE_KERNEL;
276 int index = 0;
277 int table;
278 int metric = 0;
279 u_int32_t mtu = 0;
280 uint8_t distance = 0;
281
282 void *dest = NULL;
283 void *gate = NULL;
284 void *prefsrc = NULL; /* IPv4 preferred source host address */
285 void *src = NULL; /* IPv6 srcdest source prefix */
286 enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
287
288 rtm = NLMSG_DATA(h);
289
290 if (startup && h->nlmsg_type != RTM_NEWROUTE)
291 return 0;
292 switch (rtm->rtm_type) {
293 case RTN_UNICAST:
294 break;
295 case RTN_BLACKHOLE:
296 bh_type = BLACKHOLE_NULL;
297 break;
298 case RTN_UNREACHABLE:
299 bh_type = BLACKHOLE_REJECT;
300 break;
301 case RTN_PROHIBIT:
302 bh_type = BLACKHOLE_ADMINPROHIB;
303 break;
304 default:
305 return 0;
306 }
307
308 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
309 if (len < 0)
310 return -1;
311
312 memset(tb, 0, sizeof tb);
313 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
314
315 if (rtm->rtm_flags & RTM_F_CLONED)
316 return 0;
317 if (rtm->rtm_protocol == RTPROT_REDIRECT)
318 return 0;
319 if (rtm->rtm_protocol == RTPROT_KERNEL)
320 return 0;
321
322 if (!startup && is_selfroute(rtm->rtm_protocol)
323 && h->nlmsg_type == RTM_NEWROUTE)
324 return 0;
325
326 /* We don't care about change notifications for the MPLS table. */
327 /* TODO: Revisit this. */
328 if (rtm->rtm_family == AF_MPLS)
329 return 0;
330
331 /* Table corresponding to route. */
332 if (tb[RTA_TABLE])
333 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
334 else
335 table = rtm->rtm_table;
336
337 /* Map to VRF */
338 vrf_id = vrf_lookup_by_table(table);
339 if (vrf_id == VRF_DEFAULT) {
340 if (!is_zebra_valid_kernel_table(table)
341 && !is_zebra_main_routing_table(table))
342 return 0;
343 }
344
345 /* Route which inserted by Zebra. */
346 if (is_selfroute(rtm->rtm_protocol)) {
347 flags |= ZEBRA_FLAG_SELFROUTE;
348 proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
349 }
350 if (tb[RTA_OIF])
351 index = *(int *)RTA_DATA(tb[RTA_OIF]);
352
353 if (tb[RTA_DST])
354 dest = RTA_DATA(tb[RTA_DST]);
355 else
356 dest = anyaddr;
357
358 if (tb[RTA_SRC])
359 src = RTA_DATA(tb[RTA_SRC]);
360 else
361 src = anyaddr;
362
363 if (tb[RTA_PREFSRC])
364 prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
365
366 if (tb[RTA_GATEWAY])
367 gate = RTA_DATA(tb[RTA_GATEWAY]);
368
369 if (tb[RTA_PRIORITY])
370 metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
371
372 if (tb[RTA_METRICS]) {
373 struct rtattr *mxrta[RTAX_MAX + 1];
374
375 memset(mxrta, 0, sizeof mxrta);
376 netlink_parse_rtattr(mxrta, RTAX_MAX,
377 RTA_DATA(tb[RTA_METRICS]),
378 RTA_PAYLOAD(tb[RTA_METRICS]));
379
380 if (mxrta[RTAX_MTU])
381 mtu = *(u_int32_t *)RTA_DATA(mxrta[RTAX_MTU]);
382 }
383
384 if (rtm->rtm_family == AF_INET) {
385 p.family = AF_INET;
386 memcpy(&p.u.prefix4, dest, 4);
387 p.prefixlen = rtm->rtm_dst_len;
388
389 src_p.prefixlen =
390 0; // Forces debug below to not display anything
391 } else if (rtm->rtm_family == AF_INET6) {
392 p.family = AF_INET6;
393 memcpy(&p.u.prefix6, dest, 16);
394 p.prefixlen = rtm->rtm_dst_len;
395
396 src_p.family = AF_INET6;
397 memcpy(&src_p.prefix, src, 16);
398 src_p.prefixlen = rtm->rtm_src_len;
399 }
400
401 if (rtm->rtm_src_len != 0) {
402 char buf[PREFIX_STRLEN];
403 zlog_warn(
404 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
405 prefix2str(&p, buf, sizeof(buf)), vrf_id);
406 return 0;
407 }
408
409 /*
410 * For ZEBRA_ROUTE_KERNEL types:
411 *
412 * The metric/priority of the route received from the kernel
413 * is a 32 bit number. We are going to interpret the high
414 * order byte as the Admin Distance and the low order 3 bytes
415 * as the metric.
416 *
417 * This will allow us to do two things:
418 * 1) Allow the creation of kernel routes that can be
419 * overridden by zebra.
420 * 2) Allow the old behavior for 'most' kernel route types
421 * if a user enters 'ip route ...' v4 routes get a metric
422 * of 0 and v6 routes get a metric of 1024. Both of these
423 * values will end up with a admin distance of 0, which
424 * will cause them to win for the purposes of zebra.
425 */
426 if (proto == ZEBRA_ROUTE_KERNEL) {
427 distance = (metric >> 24) & 0xFF;
428 metric = (metric & 0x00FFFFFF);
429 }
430
431 if (IS_ZEBRA_DEBUG_KERNEL) {
432 char buf[PREFIX_STRLEN];
433 char buf2[PREFIX_STRLEN];
434 zlog_debug(
435 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h->nlmsg_type),
436 prefix2str(&p, buf, sizeof(buf)),
437 src_p.prefixlen ? " from " : "",
438 src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
439 : "",
440 vrf_id, metric, distance);
441 }
442
443 afi_t afi = AFI_IP;
444 if (rtm->rtm_family == AF_INET6)
445 afi = AFI_IP6;
446
447 if (h->nlmsg_type == RTM_NEWROUTE) {
448 if (!tb[RTA_MULTIPATH]) {
449 struct nexthop nh;
450 size_t sz = (afi == AFI_IP) ? 4 : 16;
451
452 memset(&nh, 0, sizeof(nh));
453
454 if (bh_type == BLACKHOLE_UNSPEC) {
455 if (index && !gate)
456 nh.type = NEXTHOP_TYPE_IFINDEX;
457 else if (index && gate)
458 nh.type = (afi == AFI_IP)
459 ? NEXTHOP_TYPE_IPV4_IFINDEX
460 : NEXTHOP_TYPE_IPV6_IFINDEX;
461 else if (!index && gate)
462 nh.type = (afi == AFI_IP)
463 ? NEXTHOP_TYPE_IPV4
464 : NEXTHOP_TYPE_IPV6;
465 else {
466 nh.type = NEXTHOP_TYPE_BLACKHOLE;
467 nh.bh_type = bh_type;
468 }
469 } else {
470 nh.type = NEXTHOP_TYPE_BLACKHOLE;
471 nh.bh_type = bh_type;
472 }
473 nh.ifindex = index;
474 if (prefsrc)
475 memcpy(&nh.src, prefsrc, sz);
476 if (gate)
477 memcpy(&nh.gate, gate, sz);
478
479 rib_add(afi, SAFI_UNICAST, vrf_id, proto,
480 0, flags, &p, NULL, &nh, table, metric, mtu, distance);
481 } else {
482 /* This is a multipath route */
483
484 struct route_entry *re;
485 struct rtnexthop *rtnh =
486 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
487
488 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
489
490 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
491 re->type = proto;
492 re->distance = distance;
493 re->flags = flags;
494 re->metric = metric;
495 re->mtu = mtu;
496 re->vrf_id = vrf_id;
497 re->table = table;
498 re->nexthop_num = 0;
499 re->uptime = time(NULL);
500
501 for (;;) {
502 if (len < (int)sizeof(*rtnh)
503 || rtnh->rtnh_len > len)
504 break;
505
506 index = rtnh->rtnh_ifindex;
507 gate = 0;
508 if (rtnh->rtnh_len > sizeof(*rtnh)) {
509 memset(tb, 0, sizeof(tb));
510 netlink_parse_rtattr(
511 tb, RTA_MAX, RTNH_DATA(rtnh),
512 rtnh->rtnh_len - sizeof(*rtnh));
513 if (tb[RTA_GATEWAY])
514 gate = RTA_DATA(
515 tb[RTA_GATEWAY]);
516 }
517
518 if (gate) {
519 if (rtm->rtm_family == AF_INET) {
520 if (index)
521 route_entry_nexthop_ipv4_ifindex_add(
522 re, gate,
523 prefsrc, index);
524 else
525 route_entry_nexthop_ipv4_add(
526 re, gate,
527 prefsrc);
528 } else if (rtm->rtm_family
529 == AF_INET6) {
530 if (index)
531 route_entry_nexthop_ipv6_ifindex_add(
532 re, gate,
533 index);
534 else
535 route_entry_nexthop_ipv6_add(
536 re, gate);
537 }
538 } else
539 route_entry_nexthop_ifindex_add(re,
540 index);
541
542 len -= NLMSG_ALIGN(rtnh->rtnh_len);
543 rtnh = RTNH_NEXT(rtnh);
544 }
545
546 zserv_nexthop_num_warn(__func__,
547 (const struct prefix *)&p,
548 re->nexthop_num);
549 if (re->nexthop_num == 0)
550 XFREE(MTYPE_RE, re);
551 else
552 rib_add_multipath(afi, SAFI_UNICAST, &p,
553 NULL, re);
554 }
555 } else {
556 if (!tb[RTA_MULTIPATH]) {
557 struct nexthop nh;
558 size_t sz = (afi == AFI_IP) ? 4 : 16;
559
560 memset(&nh, 0, sizeof(nh));
561 if (bh_type == BLACKHOLE_UNSPEC) {
562 if (index && !gate)
563 nh.type = NEXTHOP_TYPE_IFINDEX;
564 else if (index && gate)
565 nh.type =
566 (afi == AFI_IP)
567 ? NEXTHOP_TYPE_IPV4_IFINDEX
568 : NEXTHOP_TYPE_IPV6_IFINDEX;
569 else if (!index && gate)
570 nh.type = (afi == AFI_IP)
571 ? NEXTHOP_TYPE_IPV4
572 : NEXTHOP_TYPE_IPV6;
573 else {
574 nh.type = NEXTHOP_TYPE_BLACKHOLE;
575 nh.bh_type = BLACKHOLE_UNSPEC;
576 }
577 } else {
578 nh.type = NEXTHOP_TYPE_BLACKHOLE;
579 nh.bh_type = bh_type;
580 }
581 nh.ifindex = index;
582 if (gate)
583 memcpy(&nh.gate, gate, sz);
584 rib_delete(afi, SAFI_UNICAST, vrf_id,
585 proto, 0, flags, &p, NULL, &nh,
586 table, metric, true);
587 } else {
588 /* XXX: need to compare the entire list of nexthops
589 * here for NLM_F_APPEND stupidity */
590 rib_delete(afi, SAFI_UNICAST, vrf_id,
591 proto, 0, flags, &p, NULL, NULL,
592 table, metric, true);
593 }
594 }
595
596 return 0;
597 }
598
599 static struct mcast_route_data *mroute = NULL;
600
601 static int netlink_route_change_read_multicast(struct sockaddr_nl *snl,
602 struct nlmsghdr *h,
603 ns_id_t ns_id, int startup)
604 {
605 int len;
606 struct rtmsg *rtm;
607 struct rtattr *tb[RTA_MAX + 1];
608 struct mcast_route_data *m;
609 struct mcast_route_data mr;
610 int iif = 0;
611 int count;
612 int oif[256];
613 int oif_count = 0;
614 char sbuf[40];
615 char gbuf[40];
616 char oif_list[256] = "\0";
617 vrf_id_t vrf = ns_id;
618 int table;
619
620 if (mroute)
621 m = mroute;
622 else {
623 memset(&mr, 0, sizeof(mr));
624 m = &mr;
625 }
626
627 rtm = NLMSG_DATA(h);
628
629 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
630
631 memset(tb, 0, sizeof tb);
632 netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
633
634 if (tb[RTA_TABLE])
635 table = *(int *)RTA_DATA(tb[RTA_TABLE]);
636 else
637 table = rtm->rtm_table;
638
639 vrf = vrf_lookup_by_table(table);
640
641 if (tb[RTA_IIF])
642 iif = *(int *)RTA_DATA(tb[RTA_IIF]);
643
644 if (tb[RTA_SRC])
645 m->sg.src = *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
646
647 if (tb[RTA_DST])
648 m->sg.grp = *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
649
650 if ((RTA_EXPIRES <= RTA_MAX) && tb[RTA_EXPIRES])
651 m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
652
653 if (tb[RTA_MULTIPATH]) {
654 struct rtnexthop *rtnh =
655 (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
656
657 len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
658 for (;;) {
659 if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
660 break;
661
662 oif[oif_count] = rtnh->rtnh_ifindex;
663 oif_count++;
664
665 len -= NLMSG_ALIGN(rtnh->rtnh_len);
666 rtnh = RTNH_NEXT(rtnh);
667 }
668 }
669
670 if (IS_ZEBRA_DEBUG_KERNEL) {
671 struct interface *ifp;
672 strlcpy(sbuf, inet_ntoa(m->sg.src), sizeof(sbuf));
673 strlcpy(gbuf, inet_ntoa(m->sg.grp), sizeof(gbuf));
674 for (count = 0; count < oif_count; count++) {
675 ifp = if_lookup_by_index(oif[count], vrf);
676 char temp[256];
677
678 sprintf(temp, "%s ", ifp->name);
679 strcat(oif_list, temp);
680 }
681 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf);
682 ifp = if_lookup_by_index(iif, vrf);
683 zlog_debug(
684 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
685 zvrf->vrf->name, vrf, nl_msg_type_to_str(h->nlmsg_type),
686 sbuf, gbuf, ifp->name, oif_list, m->lastused);
687 }
688 return 0;
689 }
690
691 int netlink_route_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
692 ns_id_t ns_id, int startup)
693 {
694 int len;
695 vrf_id_t vrf_id = ns_id;
696 struct rtmsg *rtm;
697
698 rtm = NLMSG_DATA(h);
699
700 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
701 /* If this is not route add/delete message print warning. */
702 zlog_warn("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
703 return 0;
704 }
705
706 /* Connected route. */
707 if (IS_ZEBRA_DEBUG_KERNEL)
708 zlog_debug("%s %s %s proto %s vrf %u",
709 nl_msg_type_to_str(h->nlmsg_type),
710 nl_family_to_str(rtm->rtm_family),
711 nl_rttype_to_str(rtm->rtm_type),
712 nl_rtproto_to_str(rtm->rtm_protocol), vrf_id);
713
714 /* We don't care about change notifications for the MPLS table. */
715 /* TODO: Revisit this. */
716 if (rtm->rtm_family == AF_MPLS)
717 return 0;
718
719 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
720 if (len < 0)
721 return -1;
722
723 if (rtm->rtm_type == RTN_MULTICAST)
724 netlink_route_change_read_multicast(snl, h, ns_id, startup);
725 else
726 netlink_route_change_read_unicast(snl, h, ns_id, startup);
727 return 0;
728 }
729
730 /* Request for specific route information from the kernel */
731 static int netlink_request_route(struct zebra_ns *zns, int family, int type)
732 {
733 struct {
734 struct nlmsghdr n;
735 struct rtmsg rtm;
736 } req;
737
738 /* Form the request, specifying filter (rtattr) if needed. */
739 memset(&req, 0, sizeof(req));
740 req.n.nlmsg_type = type;
741 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
742 req.rtm.rtm_family = family;
743
744 return netlink_request(&zns->netlink_cmd, &req.n);
745 }
746
747 /* Routing table read function using netlink interface. Only called
748 bootstrap time. */
749 int netlink_route_read(struct zebra_ns *zns)
750 {
751 int ret;
752
753 /* Get IPv4 routing table. */
754 ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
755 if (ret < 0)
756 return ret;
757 ret = netlink_parse_info(netlink_route_change_read_unicast,
758 &zns->netlink_cmd, zns, 0, 1);
759 if (ret < 0)
760 return ret;
761
762 /* Get IPv6 routing table. */
763 ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
764 if (ret < 0)
765 return ret;
766 ret = netlink_parse_info(netlink_route_change_read_unicast,
767 &zns->netlink_cmd, zns, 0, 1);
768 if (ret < 0)
769 return ret;
770
771 return 0;
772 }
773
774 static void _netlink_route_nl_add_gateway_info(u_char route_family,
775 u_char gw_family,
776 struct nlmsghdr *nlmsg,
777 size_t req_size, int bytelen,
778 struct nexthop *nexthop)
779 {
780 if (route_family == AF_MPLS) {
781 struct gw_family_t gw_fam;
782
783 gw_fam.family = gw_family;
784 if (gw_family == AF_INET)
785 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
786 else
787 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
788 addattr_l(nlmsg, req_size, RTA_VIA, &gw_fam.family,
789 bytelen + 2);
790 } else {
791 if (gw_family == AF_INET)
792 addattr_l(nlmsg, req_size, RTA_GATEWAY,
793 &nexthop->gate.ipv4, bytelen);
794 else
795 addattr_l(nlmsg, req_size, RTA_GATEWAY,
796 &nexthop->gate.ipv6, bytelen);
797 }
798 }
799
800 static void _netlink_route_rta_add_gateway_info(u_char route_family,
801 u_char gw_family,
802 struct rtattr *rta,
803 struct rtnexthop *rtnh,
804 size_t req_size, int bytelen,
805 struct nexthop *nexthop)
806 {
807 if (route_family == AF_MPLS) {
808 struct gw_family_t gw_fam;
809
810 gw_fam.family = gw_family;
811 if (gw_family == AF_INET)
812 memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
813 else
814 memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
815 rta_addattr_l(rta, req_size, RTA_VIA, &gw_fam.family,
816 bytelen + 2);
817 rtnh->rtnh_len += RTA_LENGTH(bytelen + 2);
818 } else {
819 if (gw_family == AF_INET)
820 rta_addattr_l(rta, req_size, RTA_GATEWAY,
821 &nexthop->gate.ipv4, bytelen);
822 else
823 rta_addattr_l(rta, req_size, RTA_GATEWAY,
824 &nexthop->gate.ipv6, bytelen);
825 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
826 }
827 }
828
829 /* This function takes a nexthop as argument and adds
830 * the appropriate netlink attributes to an existing
831 * netlink message.
832 *
833 * @param routedesc: Human readable description of route type
834 * (direct/recursive, single-/multipath)
835 * @param bytelen: Length of addresses in bytes.
836 * @param nexthop: Nexthop information
837 * @param nlmsg: nlmsghdr structure to fill in.
838 * @param req_size: The size allocated for the message.
839 */
840 static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
841 struct nexthop *nexthop,
842 struct nlmsghdr *nlmsg,
843 struct rtmsg *rtmsg,
844 size_t req_size, int cmd)
845 {
846 struct nexthop_label *nh_label;
847 mpls_lse_t out_lse[MPLS_MAX_LABELS];
848 char label_buf[100];
849
850 /*
851 * label_buf is *only* currently used within debugging.
852 * As such when we assign it we are guarding it inside
853 * a debug test. If you want to change this make sure
854 * you fix this assumption
855 */
856 label_buf[0] = '\0';
857 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
858 * (in the case of LER)
859 */
860 nh_label = nexthop->nh_label;
861 if (rtmsg->rtm_family == AF_MPLS) {
862 assert(nh_label);
863 assert(nh_label->num_labels == 1);
864 }
865
866 if (nh_label && nh_label->num_labels) {
867 int i, num_labels = 0;
868 u_int32_t bos;
869 char label_buf1[20];
870
871 for (i = 0; i < nh_label->num_labels; i++) {
872 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
873 bos = ((i == (nh_label->num_labels - 1)) ? 1
874 : 0);
875 out_lse[i] = mpls_lse_encode(nh_label->label[i],
876 0, 0, bos);
877 if (IS_ZEBRA_DEBUG_KERNEL) {
878 if (!num_labels)
879 sprintf(label_buf, "label %d",
880 nh_label->label[i]);
881 else {
882 sprintf(label_buf1, "/%d",
883 nh_label->label[i]);
884 strcat(label_buf, label_buf1);
885 }
886 }
887 num_labels++;
888 }
889 }
890 if (num_labels) {
891 if (rtmsg->rtm_family == AF_MPLS)
892 addattr_l(nlmsg, req_size, RTA_NEWDST, &out_lse,
893 num_labels * sizeof(mpls_lse_t));
894 else {
895 struct rtattr *nest;
896 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
897
898 addattr_l(nlmsg, req_size, RTA_ENCAP_TYPE,
899 &encap, sizeof(u_int16_t));
900 nest = addattr_nest(nlmsg, req_size, RTA_ENCAP);
901 addattr_l(nlmsg, req_size, MPLS_IPTUNNEL_DST,
902 &out_lse,
903 num_labels * sizeof(mpls_lse_t));
904 addattr_nest_end(nlmsg, nest);
905 }
906 }
907 }
908
909 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
910 rtmsg->rtm_flags |= RTNH_F_ONLINK;
911
912 if (rtmsg->rtm_family == AF_INET
913 && (nexthop->type == NEXTHOP_TYPE_IPV6
914 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
915 rtmsg->rtm_flags |= RTNH_F_ONLINK;
916 addattr_l(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4);
917 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
918
919 if (nexthop->rmap_src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
920 addattr_l(nlmsg, req_size, RTA_PREFSRC,
921 &nexthop->rmap_src.ipv4, bytelen);
922 else if (nexthop->src.ipv4.s_addr && (cmd == RTM_NEWROUTE))
923 addattr_l(nlmsg, req_size, RTA_PREFSRC,
924 &nexthop->src.ipv4, bytelen);
925
926 if (IS_ZEBRA_DEBUG_KERNEL)
927 zlog_debug(
928 " 5549: _netlink_route_build_singlepath() (%s): "
929 "nexthop via %s %s if %u",
930 routedesc, ipv4_ll_buf, label_buf,
931 nexthop->ifindex);
932 return;
933 }
934
935 if (nexthop->type == NEXTHOP_TYPE_IPV4
936 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
937 /* Send deletes to the kernel without specifying the next-hop */
938 if (cmd != RTM_DELROUTE)
939 _netlink_route_nl_add_gateway_info(
940 rtmsg->rtm_family, AF_INET, nlmsg, req_size,
941 bytelen, nexthop);
942
943 if (cmd == RTM_NEWROUTE) {
944 if (nexthop->rmap_src.ipv4.s_addr)
945 addattr_l(nlmsg, req_size, RTA_PREFSRC,
946 &nexthop->rmap_src.ipv4, bytelen);
947 else if (nexthop->src.ipv4.s_addr)
948 addattr_l(nlmsg, req_size, RTA_PREFSRC,
949 &nexthop->src.ipv4, bytelen);
950 }
951
952 if (IS_ZEBRA_DEBUG_KERNEL)
953 zlog_debug(
954 "netlink_route_multipath() (%s): "
955 "nexthop via %s %s if %u",
956 routedesc, inet_ntoa(nexthop->gate.ipv4),
957 label_buf, nexthop->ifindex);
958 }
959
960 if (nexthop->type == NEXTHOP_TYPE_IPV6
961 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
962 _netlink_route_nl_add_gateway_info(rtmsg->rtm_family, AF_INET6,
963 nlmsg, req_size, bytelen,
964 nexthop);
965
966 if (cmd == RTM_NEWROUTE) {
967 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
968 addattr_l(nlmsg, req_size, RTA_PREFSRC,
969 &nexthop->rmap_src.ipv6, bytelen);
970 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
971 addattr_l(nlmsg, req_size, RTA_PREFSRC,
972 &nexthop->src.ipv6, bytelen);
973 }
974
975 if (IS_ZEBRA_DEBUG_KERNEL)
976 zlog_debug(
977 "netlink_route_multipath() (%s): "
978 "nexthop via %s %s if %u",
979 routedesc, inet6_ntoa(nexthop->gate.ipv6),
980 label_buf, nexthop->ifindex);
981 }
982 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
983 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
984 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
985
986 if (cmd == RTM_NEWROUTE) {
987 if (nexthop->rmap_src.ipv4.s_addr)
988 addattr_l(nlmsg, req_size, RTA_PREFSRC,
989 &nexthop->rmap_src.ipv4, bytelen);
990 else if (nexthop->src.ipv4.s_addr)
991 addattr_l(nlmsg, req_size, RTA_PREFSRC,
992 &nexthop->src.ipv4, bytelen);
993 }
994
995 if (IS_ZEBRA_DEBUG_KERNEL)
996 zlog_debug(
997 "netlink_route_multipath() (%s): "
998 "nexthop via if %u",
999 routedesc, nexthop->ifindex);
1000 }
1001
1002 if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1003 addattr32(nlmsg, req_size, RTA_OIF, nexthop->ifindex);
1004
1005 if (cmd == RTM_NEWROUTE) {
1006 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1007 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1008 &nexthop->rmap_src.ipv6, bytelen);
1009 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1010 addattr_l(nlmsg, req_size, RTA_PREFSRC,
1011 &nexthop->src.ipv6, bytelen);
1012 }
1013
1014 if (IS_ZEBRA_DEBUG_KERNEL)
1015 zlog_debug(
1016 "netlink_route_multipath() (%s): "
1017 "nexthop via if %u",
1018 routedesc, nexthop->ifindex);
1019 }
1020 }
1021
1022 /* This function takes a nexthop as argument and
1023 * appends to the given rtattr/rtnexthop pair the
1024 * representation of the nexthop. If the nexthop
1025 * defines a preferred source, the src parameter
1026 * will be modified to point to that src, otherwise
1027 * it will be kept unmodified.
1028 *
1029 * @param routedesc: Human readable description of route type
1030 * (direct/recursive, single-/multipath)
1031 * @param bytelen: Length of addresses in bytes.
1032 * @param nexthop: Nexthop information
1033 * @param rta: rtnetlink attribute structure
1034 * @param rtnh: pointer to an rtnetlink nexthop structure
1035 * @param src: pointer pointing to a location where
1036 * the prefsrc should be stored.
1037 */
1038 static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
1039 struct nexthop *nexthop,
1040 struct rtattr *rta,
1041 struct rtnexthop *rtnh,
1042 struct rtmsg *rtmsg,
1043 union g_addr **src)
1044 {
1045 struct nexthop_label *nh_label;
1046 mpls_lse_t out_lse[MPLS_MAX_LABELS];
1047 char label_buf[100];
1048
1049 rtnh->rtnh_len = sizeof(*rtnh);
1050 rtnh->rtnh_flags = 0;
1051 rtnh->rtnh_hops = 0;
1052 rta->rta_len += rtnh->rtnh_len;
1053
1054 /*
1055 * label_buf is *only* currently used within debugging.
1056 * As such when we assign it we are guarding it inside
1057 * a debug test. If you want to change this make sure
1058 * you fix this assumption
1059 */
1060 label_buf[0] = '\0';
1061 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1062 * (in the case of LER)
1063 */
1064 nh_label = nexthop->nh_label;
1065 if (rtmsg->rtm_family == AF_MPLS) {
1066 assert(nh_label);
1067 assert(nh_label->num_labels == 1);
1068 }
1069
1070 if (nh_label && nh_label->num_labels) {
1071 int i, num_labels = 0;
1072 u_int32_t bos;
1073 char label_buf1[20];
1074
1075 for (i = 0; i < nh_label->num_labels; i++) {
1076 if (nh_label->label[i] != MPLS_IMP_NULL_LABEL) {
1077 bos = ((i == (nh_label->num_labels - 1)) ? 1
1078 : 0);
1079 out_lse[i] = mpls_lse_encode(nh_label->label[i],
1080 0, 0, bos);
1081 if (IS_ZEBRA_DEBUG_KERNEL) {
1082 if (!num_labels)
1083 sprintf(label_buf, "label %d",
1084 nh_label->label[i]);
1085 else {
1086 sprintf(label_buf1, "/%d",
1087 nh_label->label[i]);
1088 strcat(label_buf, label_buf1);
1089 }
1090 }
1091 num_labels++;
1092 }
1093 }
1094 if (num_labels) {
1095 if (rtmsg->rtm_family == AF_MPLS) {
1096 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_NEWDST,
1097 &out_lse,
1098 num_labels * sizeof(mpls_lse_t));
1099 rtnh->rtnh_len += RTA_LENGTH(
1100 num_labels * sizeof(mpls_lse_t));
1101 } else {
1102 struct rtattr *nest;
1103 u_int16_t encap = LWTUNNEL_ENCAP_MPLS;
1104 int len = rta->rta_len;
1105
1106 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1107 RTA_ENCAP_TYPE, &encap,
1108 sizeof(u_int16_t));
1109 nest = rta_nest(rta, NL_PKT_BUF_SIZE,
1110 RTA_ENCAP);
1111 rta_addattr_l(rta, NL_PKT_BUF_SIZE,
1112 MPLS_IPTUNNEL_DST, &out_lse,
1113 num_labels * sizeof(mpls_lse_t));
1114 rta_nest_end(rta, nest);
1115 rtnh->rtnh_len += rta->rta_len - len;
1116 }
1117 }
1118 }
1119
1120 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1121 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1122
1123 if (rtmsg->rtm_family == AF_INET
1124 && (nexthop->type == NEXTHOP_TYPE_IPV6
1125 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)) {
1126 bytelen = 4;
1127 rtnh->rtnh_flags |= RTNH_F_ONLINK;
1128 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTA_GATEWAY, &ipv4_ll,
1129 bytelen);
1130 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
1131 rtnh->rtnh_ifindex = nexthop->ifindex;
1132
1133 if (nexthop->rmap_src.ipv4.s_addr)
1134 *src = &nexthop->rmap_src;
1135 else if (nexthop->src.ipv4.s_addr)
1136 *src = &nexthop->src;
1137
1138 if (IS_ZEBRA_DEBUG_KERNEL)
1139 zlog_debug(
1140 " 5549: netlink_route_build_multipath() (%s): "
1141 "nexthop via %s %s if %u",
1142 routedesc, ipv4_ll_buf, label_buf,
1143 nexthop->ifindex);
1144 return;
1145 }
1146
1147 if (nexthop->type == NEXTHOP_TYPE_IPV4
1148 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1149 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET,
1150 rta, rtnh, NL_PKT_BUF_SIZE,
1151 bytelen, nexthop);
1152 if (nexthop->rmap_src.ipv4.s_addr)
1153 *src = &nexthop->rmap_src;
1154 else if (nexthop->src.ipv4.s_addr)
1155 *src = &nexthop->src;
1156
1157 if (IS_ZEBRA_DEBUG_KERNEL)
1158 zlog_debug(
1159 "netlink_route_multipath() (%s): "
1160 "nexthop via %s %s if %u",
1161 routedesc, inet_ntoa(nexthop->gate.ipv4),
1162 label_buf, nexthop->ifindex);
1163 }
1164 if (nexthop->type == NEXTHOP_TYPE_IPV6
1165 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1166 _netlink_route_rta_add_gateway_info(rtmsg->rtm_family, AF_INET6,
1167 rta, rtnh, NL_PKT_BUF_SIZE,
1168 bytelen, nexthop);
1169
1170 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1171 *src = &nexthop->rmap_src;
1172 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1173 *src = &nexthop->src;
1174
1175 if (IS_ZEBRA_DEBUG_KERNEL)
1176 zlog_debug(
1177 "netlink_route_multipath() (%s): "
1178 "nexthop via %s %s if %u",
1179 routedesc, inet6_ntoa(nexthop->gate.ipv6),
1180 label_buf, nexthop->ifindex);
1181 }
1182 /* ifindex */
1183 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1184 || nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1185 rtnh->rtnh_ifindex = nexthop->ifindex;
1186
1187 if (nexthop->rmap_src.ipv4.s_addr)
1188 *src = &nexthop->rmap_src;
1189 else if (nexthop->src.ipv4.s_addr)
1190 *src = &nexthop->src;
1191
1192 if (IS_ZEBRA_DEBUG_KERNEL)
1193 zlog_debug(
1194 "netlink_route_multipath() (%s): "
1195 "nexthop via if %u",
1196 routedesc, nexthop->ifindex);
1197 } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1198 rtnh->rtnh_ifindex = nexthop->ifindex;
1199
1200 if (IS_ZEBRA_DEBUG_KERNEL)
1201 zlog_debug(
1202 "netlink_route_multipath() (%s): "
1203 "nexthop via if %u",
1204 routedesc, nexthop->ifindex);
1205 } else {
1206 rtnh->rtnh_ifindex = 0;
1207 }
1208 }
1209
1210 static inline void _netlink_mpls_build_singlepath(const char *routedesc,
1211 zebra_nhlfe_t *nhlfe,
1212 struct nlmsghdr *nlmsg,
1213 struct rtmsg *rtmsg,
1214 size_t req_size, int cmd)
1215 {
1216 int bytelen;
1217 u_char family;
1218
1219 family = NHLFE_FAMILY(nhlfe);
1220 bytelen = (family == AF_INET ? 4 : 16);
1221 _netlink_route_build_singlepath(routedesc, bytelen, nhlfe->nexthop,
1222 nlmsg, rtmsg, req_size, cmd);
1223 }
1224
1225
1226 static inline void
1227 _netlink_mpls_build_multipath(const char *routedesc, zebra_nhlfe_t *nhlfe,
1228 struct rtattr *rta, struct rtnexthop *rtnh,
1229 struct rtmsg *rtmsg, union g_addr **src)
1230 {
1231 int bytelen;
1232 u_char family;
1233
1234 family = NHLFE_FAMILY(nhlfe);
1235 bytelen = (family == AF_INET ? 4 : 16);
1236 _netlink_route_build_multipath(routedesc, bytelen, nhlfe->nexthop, rta,
1237 rtnh, rtmsg, src);
1238 }
1239
1240
1241 /* Log debug information for netlink_route_multipath
1242 * if debug logging is enabled.
1243 *
1244 * @param cmd: Netlink command which is to be processed
1245 * @param p: Prefix for which the change is due
1246 * @param nexthop: Nexthop which is currently processed
1247 * @param routedesc: Semantic annotation for nexthop
1248 * (recursive, multipath, etc.)
1249 * @param family: Address family which the change concerns
1250 */
1251 static void _netlink_route_debug(int cmd, struct prefix *p,
1252 struct nexthop *nexthop, const char *routedesc,
1253 int family, struct zebra_vrf *zvrf)
1254 {
1255 if (IS_ZEBRA_DEBUG_KERNEL) {
1256 char buf[PREFIX_STRLEN];
1257 zlog_debug(
1258 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1259 routedesc, nl_msg_type_to_str(cmd),
1260 prefix2str(p, buf, sizeof(buf)), zvrf_id(zvrf),
1261 (nexthop) ? nexthop_type_to_str(nexthop->type) : "UNK");
1262 }
1263 }
1264
1265 static void _netlink_mpls_debug(int cmd, u_int32_t label, const char *routedesc)
1266 {
1267 if (IS_ZEBRA_DEBUG_KERNEL)
1268 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc,
1269 nl_msg_type_to_str(cmd), label);
1270 }
1271
1272 static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla,
1273 int llalen)
1274 {
1275 struct {
1276 struct nlmsghdr n;
1277 struct ndmsg ndm;
1278 char buf[256];
1279 } req;
1280
1281 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1282
1283 memset(&req.n, 0, sizeof(req.n));
1284 memset(&req.ndm, 0, sizeof(req.ndm));
1285
1286 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1287 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1288 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1289 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1290
1291 req.ndm.ndm_family = AF_INET;
1292 req.ndm.ndm_state = NUD_PERMANENT;
1293 req.ndm.ndm_ifindex = ifindex;
1294 req.ndm.ndm_type = RTN_UNICAST;
1295
1296 addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
1297 addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
1298
1299 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1300 0);
1301 }
1302
1303 /* Routing table change via netlink interface. */
1304 /* Update flag indicates whether this is a "replace" or not. */
1305 static int netlink_route_multipath(int cmd, struct prefix *p,
1306 struct prefix *src_p, struct route_entry *re,
1307 int update)
1308 {
1309 int bytelen;
1310 struct sockaddr_nl snl;
1311 struct nexthop *nexthop = NULL;
1312 unsigned int nexthop_num;
1313 int discard = 0;
1314 int family = PREFIX_FAMILY(p);
1315 const char *routedesc;
1316 int setsrc = 0;
1317 union g_addr src;
1318
1319 struct {
1320 struct nlmsghdr n;
1321 struct rtmsg r;
1322 char buf[NL_PKT_BUF_SIZE];
1323 } req;
1324
1325 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1326 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1327
1328 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
1329
1330 bytelen = (family == AF_INET ? 4 : 16);
1331
1332 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1333 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1334 if ((cmd == RTM_NEWROUTE) && update)
1335 req.n.nlmsg_flags |= NLM_F_REPLACE;
1336 req.n.nlmsg_type = cmd;
1337 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1338
1339 req.r.rtm_family = family;
1340 req.r.rtm_dst_len = p->prefixlen;
1341 req.r.rtm_src_len = src_p ? src_p->prefixlen : 0;
1342 req.r.rtm_protocol = zebra2proto(re->type);
1343 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1344 req.r.rtm_type = RTN_UNICAST;
1345
1346 addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1347 if (src_p)
1348 addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix,
1349 bytelen);
1350
1351 /* Metric. */
1352 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1353 * used
1354 * either by the kernel or by zebra. Its purely for calculating best
1355 * path(s)
1356 * by the routing protocol and for communicating with protocol peers.
1357 */
1358 addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC);
1359
1360 /* Table corresponding to this route. */
1361 if (re->table < 256)
1362 req.r.rtm_table = re->table;
1363 else {
1364 req.r.rtm_table = RT_TABLE_UNSPEC;
1365 addattr32(&req.n, sizeof req, RTA_TABLE, re->table);
1366 }
1367
1368 if (discard)
1369 goto skip;
1370
1371 if (re->mtu || re->nexthop_mtu) {
1372 char buf[NL_PKT_BUF_SIZE];
1373 struct rtattr *rta = (void *)buf;
1374 u_int32_t mtu = re->mtu;
1375 if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu))
1376 mtu = re->nexthop_mtu;
1377 rta->rta_type = RTA_METRICS;
1378 rta->rta_len = RTA_LENGTH(0);
1379 rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
1380 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta),
1381 RTA_PAYLOAD(rta));
1382 }
1383
1384 /* Count overall nexthops so we can decide whether to use singlepath
1385 * or multipath case. */
1386 nexthop_num = 0;
1387 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1388 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1389 continue;
1390 if (cmd == RTM_NEWROUTE
1391 && !NEXTHOP_IS_ACTIVE(nexthop->flags))
1392 continue;
1393 if (cmd == RTM_DELROUTE
1394 && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
1395 continue;
1396
1397 nexthop_num++;
1398 }
1399
1400 /* Singlepath case. */
1401 if (nexthop_num == 1 || multipath_num == 1) {
1402 nexthop_num = 0;
1403 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1404 /*
1405 * So we want to cover 2 types of blackhole
1406 * routes here:
1407 * 1) A normal blackhole route( ala from a static
1408 * install.
1409 * 2) A recursively resolved blackhole route
1410 */
1411 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1412 switch (nexthop->bh_type) {
1413 case BLACKHOLE_ADMINPROHIB:
1414 req.r.rtm_type = RTN_PROHIBIT;
1415 break;
1416 case BLACKHOLE_REJECT:
1417 req.r.rtm_type = RTN_UNREACHABLE;
1418 break;
1419 default:
1420 req.r.rtm_type = RTN_BLACKHOLE;
1421 break;
1422 }
1423 goto skip;
1424 }
1425 if (CHECK_FLAG(nexthop->flags,
1426 NEXTHOP_FLAG_RECURSIVE)) {
1427 if (!setsrc) {
1428 if (family == AF_INET) {
1429 if (nexthop->rmap_src.ipv4
1430 .s_addr
1431 != 0) {
1432 src.ipv4 =
1433 nexthop->rmap_src
1434 .ipv4;
1435 setsrc = 1;
1436 } else if (nexthop->src.ipv4
1437 .s_addr
1438 != 0) {
1439 src.ipv4 =
1440 nexthop->src
1441 .ipv4;
1442 setsrc = 1;
1443 }
1444 } else if (family == AF_INET6) {
1445 if (!IN6_IS_ADDR_UNSPECIFIED(
1446 &nexthop->rmap_src
1447 .ipv6)) {
1448 src.ipv6 =
1449 nexthop->rmap_src
1450 .ipv6;
1451 setsrc = 1;
1452 } else if (
1453 !IN6_IS_ADDR_UNSPECIFIED(
1454 &nexthop->src
1455 .ipv6)) {
1456 src.ipv6 =
1457 nexthop->src
1458 .ipv6;
1459 setsrc = 1;
1460 }
1461 }
1462 }
1463 continue;
1464 }
1465
1466 if ((cmd == RTM_NEWROUTE
1467 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1468 || (cmd == RTM_DELROUTE
1469 && CHECK_FLAG(nexthop->flags,
1470 NEXTHOP_FLAG_FIB))) {
1471 routedesc = nexthop->rparent
1472 ? "recursive, single-path"
1473 : "single-path";
1474
1475 _netlink_route_debug(cmd, p, nexthop, routedesc,
1476 family, zvrf);
1477 _netlink_route_build_singlepath(
1478 routedesc, bytelen, nexthop, &req.n,
1479 &req.r, sizeof req, cmd);
1480 nexthop_num++;
1481 break;
1482 }
1483 }
1484 if (setsrc && (cmd == RTM_NEWROUTE)) {
1485 if (family == AF_INET)
1486 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1487 &src.ipv4, bytelen);
1488 else if (family == AF_INET6)
1489 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1490 &src.ipv6, bytelen);
1491 }
1492 } else {
1493 char buf[NL_PKT_BUF_SIZE];
1494 struct rtattr *rta = (void *)buf;
1495 struct rtnexthop *rtnh;
1496 union g_addr *src1 = NULL;
1497
1498 rta->rta_type = RTA_MULTIPATH;
1499 rta->rta_len = RTA_LENGTH(0);
1500 rtnh = RTA_DATA(rta);
1501
1502 nexthop_num = 0;
1503 for (ALL_NEXTHOPS(re->nexthop, nexthop)) {
1504 if (nexthop_num >= multipath_num)
1505 break;
1506
1507 if (CHECK_FLAG(nexthop->flags,
1508 NEXTHOP_FLAG_RECURSIVE)) {
1509 /* This only works for IPv4 now */
1510 if (!setsrc) {
1511 if (family == AF_INET) {
1512 if (nexthop->rmap_src.ipv4
1513 .s_addr
1514 != 0) {
1515 src.ipv4 =
1516 nexthop->rmap_src
1517 .ipv4;
1518 setsrc = 1;
1519 } else if (nexthop->src.ipv4
1520 .s_addr
1521 != 0) {
1522 src.ipv4 =
1523 nexthop->src
1524 .ipv4;
1525 setsrc = 1;
1526 }
1527 } else if (family == AF_INET6) {
1528 if (!IN6_IS_ADDR_UNSPECIFIED(
1529 &nexthop->rmap_src
1530 .ipv6)) {
1531 src.ipv6 =
1532 nexthop->rmap_src
1533 .ipv6;
1534 setsrc = 1;
1535 } else if (
1536 !IN6_IS_ADDR_UNSPECIFIED(
1537 &nexthop->src
1538 .ipv6)) {
1539 src.ipv6 =
1540 nexthop->src
1541 .ipv6;
1542 setsrc = 1;
1543 }
1544 }
1545 }
1546 continue;
1547 }
1548
1549 if ((cmd == RTM_NEWROUTE
1550 && NEXTHOP_IS_ACTIVE(nexthop->flags))
1551 || (cmd == RTM_DELROUTE
1552 && CHECK_FLAG(nexthop->flags,
1553 NEXTHOP_FLAG_FIB))) {
1554 routedesc = nexthop->rparent
1555 ? "recursive, multipath"
1556 : "multipath";
1557 nexthop_num++;
1558
1559 _netlink_route_debug(cmd, p, nexthop, routedesc,
1560 family, zvrf);
1561 _netlink_route_build_multipath(
1562 routedesc, bytelen, nexthop, rta, rtnh,
1563 &req.r, &src1);
1564 rtnh = RTNH_NEXT(rtnh);
1565
1566 if (!setsrc && src1) {
1567 if (family == AF_INET)
1568 src.ipv4 = src1->ipv4;
1569 else if (family == AF_INET6)
1570 src.ipv6 = src1->ipv6;
1571
1572 setsrc = 1;
1573 }
1574 }
1575 }
1576 if (setsrc && (cmd == RTM_NEWROUTE)) {
1577 if (family == AF_INET)
1578 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1579 &src.ipv4, bytelen);
1580 else if (family == AF_INET6)
1581 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1582 &src.ipv6, bytelen);
1583 if (IS_ZEBRA_DEBUG_KERNEL)
1584 zlog_debug("Setting source");
1585 }
1586
1587 if (rta->rta_len > RTA_LENGTH(0))
1588 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
1589 RTA_DATA(rta), RTA_PAYLOAD(rta));
1590 }
1591
1592 /* If there is no useful nexthop then return. */
1593 if (nexthop_num == 0) {
1594 if (IS_ZEBRA_DEBUG_KERNEL)
1595 zlog_debug(
1596 "netlink_route_multipath(): No useful nexthop.");
1597 return 0;
1598 }
1599
1600 skip:
1601
1602 /* Destination netlink address. */
1603 memset(&snl, 0, sizeof snl);
1604 snl.nl_family = AF_NETLINK;
1605
1606 /* Talk to netlink socket. */
1607 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1608 0);
1609 }
1610
1611 int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
1612 {
1613 int suc = 0;
1614 struct mcast_route_data *mr = (struct mcast_route_data *)in;
1615 struct {
1616 struct nlmsghdr n;
1617 struct ndmsg ndm;
1618 char buf[256];
1619 } req;
1620
1621 mroute = mr;
1622 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1623
1624 memset(&req.n, 0, sizeof(req.n));
1625 memset(&req.ndm, 0, sizeof(req.ndm));
1626
1627 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1628 req.n.nlmsg_flags = NLM_F_REQUEST;
1629 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1630
1631 req.ndm.ndm_family = RTNL_FAMILY_IPMR;
1632 req.n.nlmsg_type = RTM_GETROUTE;
1633
1634 addattr_l(&req.n, sizeof(req), RTA_IIF, &mroute->ifindex, 4);
1635 addattr_l(&req.n, sizeof(req), RTA_OIF, &mroute->ifindex, 4);
1636 addattr_l(&req.n, sizeof(req), RTA_SRC, &mroute->sg.src.s_addr, 4);
1637 addattr_l(&req.n, sizeof(req), RTA_DST, &mroute->sg.grp.s_addr, 4);
1638 addattr_l(&req.n, sizeof(req), RTA_TABLE, &zvrf->table_id, 4);
1639
1640 suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
1641 &zns->netlink_cmd, zns, 0);
1642
1643 mroute = NULL;
1644 return suc;
1645 }
1646
1647 int kernel_route_rib(struct prefix *p, struct prefix *src_p,
1648 struct route_entry *old, struct route_entry *new)
1649 {
1650 assert(old || new);
1651
1652 if (!old && new)
1653 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 0);
1654 if (old && !new)
1655 return netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0);
1656
1657 return netlink_route_multipath(RTM_NEWROUTE, p, src_p, new, 1);
1658 }
1659
1660 int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
1661 int llalen)
1662 {
1663 return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
1664 addr, lla, llalen);
1665 }
1666
1667 /*
1668 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1669 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1670 */
1671 static int netlink_vxlan_flood_list_update(struct interface *ifp,
1672 struct in_addr *vtep_ip, int cmd)
1673 {
1674 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1675 struct {
1676 struct nlmsghdr n;
1677 struct ndmsg ndm;
1678 char buf[256];
1679 } req;
1680 u_char dst_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1681
1682 memset(&req.n, 0, sizeof(req.n));
1683 memset(&req.ndm, 0, sizeof(req.ndm));
1684
1685 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1686 req.n.nlmsg_flags = NLM_F_REQUEST;
1687 if (cmd == RTM_NEWNEIGH)
1688 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_APPEND);
1689 req.n.nlmsg_type = cmd;
1690 req.ndm.ndm_family = PF_BRIDGE;
1691 req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
1692 req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
1693
1694
1695 addattr_l(&req.n, sizeof(req), NDA_LLADDR, &dst_mac, 6);
1696 req.ndm.ndm_ifindex = ifp->ifindex;
1697 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip->s_addr, 4);
1698
1699 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
1700 0);
1701 }
1702
1703 /*
1704 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1705 * adding
1706 * a "flood" MAC FDB entry.
1707 */
1708 int kernel_add_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1709 {
1710 if (IS_ZEBRA_DEBUG_VXLAN)
1711 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1712 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1713
1714 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_NEWNEIGH);
1715 }
1716
1717 /*
1718 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1719 * deleting the "flood" MAC FDB entry.
1720 */
1721 int kernel_del_vtep(vni_t vni, struct interface *ifp, struct in_addr *vtep_ip)
1722 {
1723 if (IS_ZEBRA_DEBUG_VXLAN)
1724 zlog_debug(
1725 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1726 inet_ntoa(*vtep_ip), vni, ifp->name, ifp->ifindex);
1727
1728 return netlink_vxlan_flood_list_update(ifp, vtep_ip, RTM_DELNEIGH);
1729 }
1730
1731 #ifndef NDA_RTA
1732 #define NDA_RTA(r) \
1733 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1734 #endif
1735
1736 static int netlink_macfdb_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
1737 int len)
1738 {
1739 struct ndmsg *ndm;
1740 struct interface *ifp;
1741 struct zebra_if *zif;
1742 struct zebra_vrf *zvrf;
1743 struct rtattr *tb[NDA_MAX + 1];
1744 struct interface *br_if;
1745 struct ethaddr mac;
1746 vlanid_t vid = 0;
1747 struct prefix vtep_ip;
1748 int vid_present = 0, dst_present = 0;
1749 char buf[ETHER_ADDR_STRLEN];
1750 char vid_buf[20];
1751 char dst_buf[30];
1752 u_char sticky = 0;
1753
1754 ndm = NLMSG_DATA(h);
1755
1756 /* The interface should exist. */
1757 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
1758 ndm->ndm_ifindex);
1759 if (!ifp)
1760 return 0;
1761
1762 /* Locate VRF corresponding to interface. We only process MAC
1763 * notifications
1764 * if EVPN is enabled on this VRF.
1765 */
1766 zvrf = vrf_info_lookup(ifp->vrf_id);
1767 if (!zvrf || !EVPN_ENABLED(zvrf))
1768 return 0;
1769 if (!ifp->info)
1770 return 0;
1771
1772 /* The interface should be something we're interested in. */
1773 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
1774 return 0;
1775
1776 /* Drop "permanent" entries. */
1777 if (ndm->ndm_state & NUD_PERMANENT)
1778 return 0;
1779
1780 zif = (struct zebra_if *)ifp->info;
1781 if ((br_if = zif->brslave_info.br_if) == NULL) {
1782 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1783 nl_msg_type_to_str(h->nlmsg_type),
1784 nl_family_to_str(ndm->ndm_family), ifp->name,
1785 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1786 return 0;
1787 }
1788
1789 /* Parse attributes and extract fields of interest. */
1790 memset(tb, 0, sizeof tb);
1791 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
1792
1793 if (!tb[NDA_LLADDR]) {
1794 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1795 nl_msg_type_to_str(h->nlmsg_type),
1796 nl_family_to_str(ndm->ndm_family), ifp->name,
1797 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex);
1798 return 0;
1799 }
1800
1801 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
1802 zlog_warn(
1803 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
1804 nl_msg_type_to_str(h->nlmsg_type),
1805 nl_family_to_str(ndm->ndm_family), ifp->name,
1806 ndm->ndm_ifindex, zif->brslave_info.bridge_ifindex,
1807 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
1808 return 0;
1809 }
1810
1811 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
1812
1813 if ((NDA_VLAN <= NDA_MAX) && tb[NDA_VLAN]) {
1814 vid_present = 1;
1815 vid = *(u_int16_t *)RTA_DATA(tb[NDA_VLAN]);
1816 sprintf(vid_buf, " VLAN %u", vid);
1817 }
1818
1819 if (tb[NDA_DST]) {
1820 /* TODO: Only IPv4 supported now. */
1821 dst_present = 1;
1822 vtep_ip.family = AF_INET;
1823 vtep_ip.prefixlen = IPV4_MAX_BITLEN;
1824 memcpy(&(vtep_ip.u.prefix4.s_addr), RTA_DATA(tb[NDA_DST]),
1825 IPV4_MAX_BYTELEN);
1826 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip.u.prefix4));
1827 }
1828
1829 sticky = (ndm->ndm_state & NUD_NOARP) ? 1 : 0;
1830
1831 if (IS_ZEBRA_DEBUG_KERNEL)
1832 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1833 nl_msg_type_to_str(h->nlmsg_type),
1834 nl_family_to_str(ndm->ndm_family), ifp->name,
1835 ndm->ndm_ifindex, vid_present ? vid_buf : "",
1836 sticky ? "sticky " : "",
1837 prefix_mac2str(&mac, buf, sizeof(buf)),
1838 dst_present ? dst_buf : "");
1839
1840 if (filter_vlan && vid != filter_vlan)
1841 return 0;
1842
1843 /* If add or update, do accordingly if learnt on a "local" interface; if
1844 * the notification is over VxLAN, this has to be related to
1845 * multi-homing,
1846 * so perform an implicit delete of any local entry (if it exists).
1847 */
1848 if (h->nlmsg_type == RTM_NEWNEIGH) {
1849 /* Drop "permanent" entries. */
1850 if (ndm->ndm_state & NUD_PERMANENT)
1851 return 0;
1852
1853 if (IS_ZEBRA_IF_VXLAN(ifp))
1854 return zebra_vxlan_check_del_local_mac(ifp, br_if, &mac,
1855 vid);
1856
1857 return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
1858 sticky);
1859 }
1860
1861 /* This is a delete notification.
1862 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1863 * 2. For a MAC over "local" interface, delete the mac
1864 * Note: We will get notifications from both bridge driver and VxLAN
1865 * driver.
1866 * Ignore the notification from VxLan driver as it is also generated
1867 * when mac moves from remote to local.
1868 */
1869 if (dst_present)
1870 return 0;
1871
1872 if (IS_ZEBRA_IF_VXLAN(ifp))
1873 return zebra_vxlan_check_readd_remote_mac(ifp, br_if, &mac,
1874 vid);
1875
1876 return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
1877 }
1878
1879 static int netlink_macfdb_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
1880 ns_id_t ns_id, int startup)
1881 {
1882 int len;
1883 struct ndmsg *ndm;
1884
1885 if (h->nlmsg_type != RTM_NEWNEIGH)
1886 return 0;
1887
1888 /* Length validity. */
1889 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
1890 if (len < 0)
1891 return -1;
1892
1893 /* We are interested only in AF_BRIDGE notifications. */
1894 ndm = NLMSG_DATA(h);
1895 if (ndm->ndm_family != AF_BRIDGE)
1896 return 0;
1897
1898 return netlink_macfdb_change(snl, h, len);
1899 }
1900
1901 /* Request for MAC FDB information from the kernel */
1902 static int netlink_request_macs(struct zebra_ns *zns, int family, int type,
1903 ifindex_t master_ifindex)
1904 {
1905 struct {
1906 struct nlmsghdr n;
1907 struct ifinfomsg ifm;
1908 char buf[256];
1909 } req;
1910
1911 /* Form the request, specifying filter (rtattr) if needed. */
1912 memset(&req, 0, sizeof(req));
1913 req.n.nlmsg_type = type;
1914 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1915 req.ifm.ifi_family = family;
1916 if (master_ifindex)
1917 addattr32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
1918
1919 return netlink_request(&zns->netlink_cmd, &req.n);
1920 }
1921
1922 /*
1923 * MAC forwarding database read using netlink interface. This is invoked
1924 * at startup.
1925 */
1926 int netlink_macfdb_read(struct zebra_ns *zns)
1927 {
1928 int ret;
1929
1930 /* Get bridge FDB table. */
1931 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH, 0);
1932 if (ret < 0)
1933 return ret;
1934 /* We are reading entire table. */
1935 filter_vlan = 0;
1936 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1937 0, 1);
1938
1939 return ret;
1940 }
1941
1942 /*
1943 * MAC forwarding database read using netlink interface. This is for a
1944 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1945 */
1946 int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
1947 struct interface *br_if)
1948 {
1949 struct zebra_if *br_zif;
1950 struct zebra_if *zif;
1951 struct zebra_l2info_vxlan *vxl;
1952 int ret = 0;
1953
1954
1955 /* Save VLAN we're filtering on, if needed. */
1956 br_zif = (struct zebra_if *)br_if->info;
1957 zif = (struct zebra_if *)ifp->info;
1958 vxl = &zif->l2info.vxl;
1959 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
1960 filter_vlan = vxl->access_vlan;
1961
1962 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
1963 */
1964 ret = netlink_request_macs(zns, AF_BRIDGE, RTM_GETNEIGH,
1965 br_if->ifindex);
1966 if (ret < 0)
1967 return ret;
1968 ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, zns,
1969 0, 0);
1970
1971 /* Reset VLAN filter. */
1972 filter_vlan = 0;
1973 return ret;
1974 }
1975
1976 static int netlink_macfdb_update(struct interface *ifp, vlanid_t vid,
1977 struct ethaddr *mac, struct in_addr vtep_ip,
1978 int local, int cmd, u_char sticky)
1979 {
1980 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
1981 struct {
1982 struct nlmsghdr n;
1983 struct ndmsg ndm;
1984 char buf[256];
1985 } req;
1986 int dst_alen;
1987 struct zebra_if *zif;
1988 struct interface *br_if;
1989 struct zebra_if *br_zif;
1990 char buf[ETHER_ADDR_STRLEN];
1991 int vid_present = 0, dst_present = 0;
1992 char vid_buf[20];
1993 char dst_buf[30];
1994
1995 zif = ifp->info;
1996 if ((br_if = zif->brslave_info.br_if) == NULL) {
1997 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
1998 (cmd == RTM_NEWNEIGH) ? "add" : "del", ifp->name,
1999 ifp->ifindex);
2000 return -1;
2001 }
2002
2003 memset(&req.n, 0, sizeof(req.n));
2004 memset(&req.ndm, 0, sizeof(req.ndm));
2005
2006 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2007 req.n.nlmsg_flags = NLM_F_REQUEST;
2008 if (cmd == RTM_NEWNEIGH)
2009 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2010 req.n.nlmsg_type = cmd;
2011 req.ndm.ndm_family = AF_BRIDGE;
2012 req.ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
2013 req.ndm.ndm_state = NUD_REACHABLE;
2014
2015 if (sticky)
2016 req.ndm.ndm_state |= NUD_NOARP;
2017 else
2018 req.ndm.ndm_flags |= NTF_EXT_LEARNED;
2019
2020 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2021 req.ndm.ndm_ifindex = ifp->ifindex;
2022 if (!local) {
2023 dst_alen = 4; // TODO: hardcoded
2024 addattr_l(&req.n, sizeof(req), NDA_DST, &vtep_ip, dst_alen);
2025 dst_present = 1;
2026 sprintf(dst_buf, " dst %s", inet_ntoa(vtep_ip));
2027 }
2028 br_zif = (struct zebra_if *)br_if->info;
2029 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) {
2030 addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
2031 vid_present = 1;
2032 sprintf(vid_buf, " VLAN %u", vid);
2033 }
2034 addattr32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex);
2035
2036 if (IS_ZEBRA_DEBUG_KERNEL)
2037 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
2038 nl_msg_type_to_str(cmd),
2039 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2040 ifp->ifindex, vid_present ? vid_buf : "",
2041 sticky ? "sticky " : "",
2042 prefix_mac2str(mac, buf, sizeof(buf)),
2043 dst_present ? dst_buf : "");
2044
2045 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2046 0);
2047 }
2048
2049 #define NUD_VALID \
2050 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
2051 | NUD_DELAY)
2052
2053 static int netlink_ipneigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2054 int len)
2055 {
2056 struct ndmsg *ndm;
2057 struct interface *ifp;
2058 struct zebra_if *zif;
2059 struct zebra_vrf *zvrf;
2060 struct rtattr *tb[NDA_MAX + 1];
2061 struct interface *link_if;
2062 struct ethaddr mac;
2063 struct ipaddr ip;
2064 char buf[ETHER_ADDR_STRLEN];
2065 char buf2[INET6_ADDRSTRLEN];
2066 int mac_present = 0;
2067 u_char ext_learned;
2068
2069 ndm = NLMSG_DATA(h);
2070
2071 /* The interface should exist. */
2072 ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2073 ndm->ndm_ifindex);
2074 if (!ifp)
2075 return 0;
2076
2077 /* Locate VRF corresponding to interface. We only process neigh
2078 * notifications
2079 * if EVPN is enabled on this VRF.
2080 */
2081 zvrf = vrf_info_lookup(ifp->vrf_id);
2082 if (!zvrf || !EVPN_ENABLED(zvrf))
2083 return 0;
2084 if (!ifp->info)
2085 return 0;
2086
2087 /* Drop "permanent" entries. */
2088 if (ndm->ndm_state & NUD_PERMANENT)
2089 return 0;
2090
2091 zif = (struct zebra_if *)ifp->info;
2092 /* The neighbor is present on an SVI. From this, we locate the
2093 * underlying
2094 * bridge because we're only interested in neighbors on a VxLAN bridge.
2095 * The bridge is located based on the nature of the SVI:
2096 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2097 * interface
2098 * and is linked to the bridge
2099 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2100 * inteface
2101 * itself
2102 */
2103 if (IS_ZEBRA_IF_VLAN(ifp)) {
2104 link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
2105 zif->link_ifindex);
2106 if (!link_if)
2107 return 0;
2108 } else if (IS_ZEBRA_IF_BRIDGE(ifp))
2109 link_if = ifp;
2110 else
2111 return 0;
2112
2113 /* Parse attributes and extract fields of interest. */
2114 memset(tb, 0, sizeof tb);
2115 netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
2116
2117 if (!tb[NDA_DST]) {
2118 zlog_warn("%s family %s IF %s(%u) - no DST",
2119 nl_msg_type_to_str(h->nlmsg_type),
2120 nl_family_to_str(ndm->ndm_family), ifp->name,
2121 ndm->ndm_ifindex);
2122 return 0;
2123 }
2124 memset(&mac, 0, sizeof(struct ethaddr));
2125 memset(&ip, 0, sizeof(struct ipaddr));
2126 ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
2127 memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
2128
2129 if (h->nlmsg_type == RTM_NEWNEIGH) {
2130 if (tb[NDA_LLADDR]) {
2131 if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
2132 zlog_warn(
2133 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2134 nl_msg_type_to_str(h->nlmsg_type),
2135 nl_family_to_str(ndm->ndm_family),
2136 ifp->name, ndm->ndm_ifindex,
2137 (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
2138 return 0;
2139 }
2140
2141 mac_present = 1;
2142 memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
2143 }
2144
2145 ext_learned = (ndm->ndm_flags & NTF_EXT_LEARNED) ? 1 : 0;
2146
2147 if (IS_ZEBRA_DEBUG_KERNEL)
2148 zlog_debug(
2149 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2150 nl_msg_type_to_str(h->nlmsg_type),
2151 nl_family_to_str(ndm->ndm_family), ifp->name,
2152 ndm->ndm_ifindex,
2153 ipaddr2str(&ip, buf2, sizeof(buf2)),
2154 mac_present
2155 ? prefix_mac2str(&mac, buf, sizeof(buf))
2156 : "",
2157 ndm->ndm_state, ndm->ndm_flags);
2158
2159 /* If the neighbor state is valid for use, process as an add or
2160 * update
2161 * else process as a delete. Note that the delete handling may
2162 * result
2163 * in re-adding the neighbor if it is a valid "remote" neighbor.
2164 */
2165 if (ndm->ndm_state & NUD_VALID)
2166 return zebra_vxlan_local_neigh_add_update(
2167 ifp, link_if, &ip, &mac, ndm->ndm_state,
2168 ext_learned);
2169
2170 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2171 }
2172
2173 if (IS_ZEBRA_DEBUG_KERNEL)
2174 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2175 nl_msg_type_to_str(h->nlmsg_type),
2176 nl_family_to_str(ndm->ndm_family), ifp->name,
2177 ndm->ndm_ifindex,
2178 ipaddr2str(&ip, buf2, sizeof(buf2)));
2179
2180 /* Process the delete - it may result in re-adding the neighbor if it is
2181 * a valid "remote" neighbor.
2182 */
2183 return zebra_vxlan_local_neigh_del(ifp, link_if, &ip);
2184 }
2185
2186 static int netlink_neigh_table(struct sockaddr_nl *snl, struct nlmsghdr *h,
2187 ns_id_t ns_id, int startup)
2188 {
2189 int len;
2190 struct ndmsg *ndm;
2191
2192 if (h->nlmsg_type != RTM_NEWNEIGH)
2193 return 0;
2194
2195 /* Length validity. */
2196 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2197 if (len < 0)
2198 return -1;
2199
2200 /* We are interested only in AF_INET or AF_INET6 notifications. */
2201 ndm = NLMSG_DATA(h);
2202 if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
2203 return 0;
2204
2205 return netlink_neigh_change(snl, h, len);
2206 }
2207
2208 /* Request for IP neighbor information from the kernel */
2209 static int netlink_request_neigh(struct zebra_ns *zns, int family, int type,
2210 ifindex_t ifindex)
2211 {
2212 struct {
2213 struct nlmsghdr n;
2214 struct ndmsg ndm;
2215 char buf[256];
2216 } req;
2217
2218 /* Form the request, specifying filter (rtattr) if needed. */
2219 memset(&req, 0, sizeof(req));
2220 req.n.nlmsg_type = type;
2221 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2222 req.ndm.ndm_family = family;
2223 if (ifindex)
2224 addattr32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
2225
2226 return netlink_request(&zns->netlink_cmd, &req.n);
2227 }
2228
2229 /*
2230 * IP Neighbor table read using netlink interface. This is invoked
2231 * at startup.
2232 */
2233 int netlink_neigh_read(struct zebra_ns *zns)
2234 {
2235 int ret;
2236
2237 /* Get IP neighbor table. */
2238 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH, 0);
2239 if (ret < 0)
2240 return ret;
2241 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2242 1);
2243
2244 return ret;
2245 }
2246
2247 /*
2248 * IP Neighbor table read using netlink interface. This is for a specific
2249 * VLAN device.
2250 */
2251 int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
2252 {
2253 int ret = 0;
2254
2255 ret = netlink_request_neigh(zns, AF_UNSPEC, RTM_GETNEIGH,
2256 vlan_if->ifindex);
2257 if (ret < 0)
2258 return ret;
2259 ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, zns, 0,
2260 0);
2261
2262 return ret;
2263 }
2264
2265 int netlink_neigh_change(struct sockaddr_nl *snl, struct nlmsghdr *h,
2266 ns_id_t ns_id)
2267 {
2268 int len;
2269 struct ndmsg *ndm;
2270
2271 if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH))
2272 return 0;
2273
2274 /* Length validity. */
2275 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
2276 if (len < 0)
2277 return -1;
2278
2279 /* Is this a notification for the MAC FDB or IP neighbor table? */
2280 ndm = NLMSG_DATA(h);
2281 if (ndm->ndm_family == AF_BRIDGE)
2282 return netlink_macfdb_change(snl, h, len);
2283
2284 if (ndm->ndm_type != RTN_UNICAST)
2285 return 0;
2286
2287 if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
2288 return netlink_ipneigh_change(snl, h, len);
2289
2290 return 0;
2291 }
2292
2293 static int netlink_neigh_update2(struct interface *ifp, struct ipaddr *ip,
2294 struct ethaddr *mac, u_int32_t flags, int cmd)
2295 {
2296 struct {
2297 struct nlmsghdr n;
2298 struct ndmsg ndm;
2299 char buf[256];
2300 } req;
2301 int ipa_len;
2302
2303 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2304 char buf[INET6_ADDRSTRLEN];
2305 char buf2[ETHER_ADDR_STRLEN];
2306
2307 memset(&req.n, 0, sizeof(req.n));
2308 memset(&req.ndm, 0, sizeof(req.ndm));
2309
2310 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
2311 req.n.nlmsg_flags = NLM_F_REQUEST;
2312 if (cmd == RTM_NEWNEIGH)
2313 req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
2314 req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
2315 req.ndm.ndm_family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
2316 req.ndm.ndm_state = flags;
2317 req.ndm.ndm_ifindex = ifp->ifindex;
2318 req.ndm.ndm_type = RTN_UNICAST;
2319 req.ndm.ndm_flags = NTF_EXT_LEARNED;
2320
2321
2322 ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
2323 addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
2324 if (mac)
2325 addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
2326
2327 if (IS_ZEBRA_DEBUG_KERNEL)
2328 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2329 nl_msg_type_to_str(cmd),
2330 nl_family_to_str(req.ndm.ndm_family), ifp->name,
2331 ifp->ifindex, ipaddr2str(ip, buf, sizeof(buf)),
2332 mac ? prefix_mac2str(mac, buf2, sizeof(buf2))
2333 : "null");
2334
2335 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2336 0);
2337 }
2338
2339 int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2340 struct in_addr vtep_ip, u_char sticky)
2341 {
2342 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, 0, RTM_NEWNEIGH,
2343 sticky);
2344 }
2345
2346 int kernel_del_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
2347 struct in_addr vtep_ip, int local)
2348 {
2349 return netlink_macfdb_update(ifp, vid, mac, vtep_ip, local,
2350 RTM_DELNEIGH, 0);
2351 }
2352
2353 int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
2354 struct ethaddr *mac)
2355 {
2356 return netlink_neigh_update2(ifp, ip, mac, NUD_REACHABLE, RTM_NEWNEIGH);
2357 }
2358
2359 int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip)
2360 {
2361 return netlink_neigh_update2(ifp, ip, NULL, 0, RTM_DELNEIGH);
2362 }
2363
2364 /*
2365 * MPLS label forwarding table change via netlink interface.
2366 */
2367 int netlink_mpls_multipath(int cmd, zebra_lsp_t *lsp)
2368 {
2369 mpls_lse_t lse;
2370 zebra_nhlfe_t *nhlfe;
2371 struct nexthop *nexthop = NULL;
2372 unsigned int nexthop_num;
2373 const char *routedesc;
2374 struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT);
2375 int route_type;
2376
2377 struct {
2378 struct nlmsghdr n;
2379 struct rtmsg r;
2380 char buf[NL_PKT_BUF_SIZE];
2381 } req;
2382
2383 memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE);
2384
2385
2386 /*
2387 * Count # nexthops so we can decide whether to use singlepath
2388 * or multipath case.
2389 */
2390 nexthop_num = 0;
2391 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2392 nexthop = nhlfe->nexthop;
2393 if (!nexthop)
2394 continue;
2395 if (cmd == RTM_NEWROUTE) {
2396 /* Count all selected NHLFEs */
2397 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2398 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2399 nexthop_num++;
2400 } else /* DEL */
2401 {
2402 /* Count all installed NHLFEs */
2403 if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
2404 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
2405 nexthop_num++;
2406 }
2407 }
2408
2409 if (nexthop_num == 0 || !lsp->best_nhlfe) // unexpected
2410 return 0;
2411
2412 route_type = re_type_from_lsp_type(lsp->best_nhlfe->type);
2413
2414 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2415 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2416 req.n.nlmsg_type = cmd;
2417 req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2418
2419 req.r.rtm_family = AF_MPLS;
2420 req.r.rtm_table = RT_TABLE_MAIN;
2421 req.r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
2422 req.r.rtm_protocol = zebra2proto(route_type);
2423 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
2424 req.r.rtm_type = RTN_UNICAST;
2425
2426 if (cmd == RTM_NEWROUTE)
2427 /* We do a replace to handle update. */
2428 req.n.nlmsg_flags |= NLM_F_REPLACE;
2429
2430 /* Fill destination */
2431 lse = mpls_lse_encode(lsp->ile.in_label, 0, 0, 1);
2432 addattr_l(&req.n, sizeof req, RTA_DST, &lse, sizeof(mpls_lse_t));
2433
2434 /* Fill nexthops (paths) based on single-path or multipath. The paths
2435 * chosen depend on the operation.
2436 */
2437 if (nexthop_num == 1 || multipath_num == 1) {
2438 routedesc = "single-path";
2439 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2440
2441 nexthop_num = 0;
2442 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2443 nexthop = nhlfe->nexthop;
2444 if (!nexthop)
2445 continue;
2446
2447 if ((cmd == RTM_NEWROUTE
2448 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2449 && CHECK_FLAG(nexthop->flags,
2450 NEXTHOP_FLAG_ACTIVE)))
2451 || (cmd == RTM_DELROUTE
2452 && (CHECK_FLAG(nhlfe->flags,
2453 NHLFE_FLAG_INSTALLED)
2454 && CHECK_FLAG(nexthop->flags,
2455 NEXTHOP_FLAG_FIB)))) {
2456 /* Add the gateway */
2457 _netlink_mpls_build_singlepath(routedesc, nhlfe,
2458 &req.n, &req.r,
2459 sizeof req, cmd);
2460 if (cmd == RTM_NEWROUTE) {
2461 SET_FLAG(nhlfe->flags,
2462 NHLFE_FLAG_INSTALLED);
2463 SET_FLAG(nexthop->flags,
2464 NEXTHOP_FLAG_FIB);
2465 } else {
2466 UNSET_FLAG(nhlfe->flags,
2467 NHLFE_FLAG_INSTALLED);
2468 UNSET_FLAG(nexthop->flags,
2469 NEXTHOP_FLAG_FIB);
2470 }
2471 nexthop_num++;
2472 break;
2473 }
2474 }
2475 } else /* Multipath case */
2476 {
2477 char buf[NL_PKT_BUF_SIZE];
2478 struct rtattr *rta = (void *)buf;
2479 struct rtnexthop *rtnh;
2480 union g_addr *src1 = NULL;
2481
2482 rta->rta_type = RTA_MULTIPATH;
2483 rta->rta_len = RTA_LENGTH(0);
2484 rtnh = RTA_DATA(rta);
2485
2486 routedesc = "multipath";
2487 _netlink_mpls_debug(cmd, lsp->ile.in_label, routedesc);
2488
2489 nexthop_num = 0;
2490 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2491 nexthop = nhlfe->nexthop;
2492 if (!nexthop)
2493 continue;
2494
2495 if (nexthop_num >= multipath_num)
2496 break;
2497
2498 if ((cmd == RTM_NEWROUTE
2499 && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
2500 && CHECK_FLAG(nexthop->flags,
2501 NEXTHOP_FLAG_ACTIVE)))
2502 || (cmd == RTM_DELROUTE
2503 && (CHECK_FLAG(nhlfe->flags,
2504 NHLFE_FLAG_INSTALLED)
2505 && CHECK_FLAG(nexthop->flags,
2506 NEXTHOP_FLAG_FIB)))) {
2507 nexthop_num++;
2508
2509 /* Build the multipath */
2510 _netlink_mpls_build_multipath(routedesc, nhlfe,
2511 rta, rtnh, &req.r,
2512 &src1);
2513 rtnh = RTNH_NEXT(rtnh);
2514
2515 if (cmd == RTM_NEWROUTE) {
2516 SET_FLAG(nhlfe->flags,
2517 NHLFE_FLAG_INSTALLED);
2518 SET_FLAG(nexthop->flags,
2519 NEXTHOP_FLAG_FIB);
2520 } else {
2521 UNSET_FLAG(nhlfe->flags,
2522 NHLFE_FLAG_INSTALLED);
2523 UNSET_FLAG(nexthop->flags,
2524 NEXTHOP_FLAG_FIB);
2525 }
2526 }
2527 }
2528
2529 /* Add the multipath */
2530 if (rta->rta_len > RTA_LENGTH(0))
2531 addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH,
2532 RTA_DATA(rta), RTA_PAYLOAD(rta));
2533 }
2534
2535 /* Talk to netlink socket. */
2536 return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2537 0);
2538 }
2539
2540 /*
2541 * Handle failure in LSP install, clear flags for NHLFE.
2542 */
2543 void clear_nhlfe_installed(zebra_lsp_t *lsp)
2544 {
2545 zebra_nhlfe_t *nhlfe;
2546 struct nexthop *nexthop;
2547
2548 for (nhlfe = lsp->nhlfe_list; nhlfe; nhlfe = nhlfe->next) {
2549 nexthop = nhlfe->nexthop;
2550 if (!nexthop)
2551 continue;
2552
2553 UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED);
2554 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
2555 }
2556 }
2557
2558 #endif /* HAVE_NETLINK */