1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <net/if_arp.h>
26 #include <linux/lwtunnel.h>
27 #include <linux/mpls_iptunnel.h>
28 #include <linux/neighbour.h>
29 #include <linux/rtnetlink.h>
31 /* Hack for GNU libc version 2. */
33 #define MSG_TRUNC 0x20
34 #endif /* MSG_TRUNC */
40 #include "connected.h"
43 #include "zebra_memory.h"
53 #include "zebra/zserv.h"
54 #include "zebra/zebra_ns.h"
55 #include "zebra/zebra_vrf.h"
57 #include "zebra/redistribute.h"
58 #include "zebra/interface.h"
59 #include "zebra/debug.h"
60 #include "zebra/rtadv.h"
61 #include "zebra/zebra_ptm.h"
62 #include "zebra/zebra_mpls.h"
63 #include "zebra/kernel_netlink.h"
64 #include "zebra/rt_netlink.h"
65 #include "zebra/zebra_mroute.h"
66 #include "zebra/zebra_vxlan.h"
72 static vlanid_t filter_vlan
= 0;
80 char ipv4_ll_buf
[16] = "169.254.0.1";
81 struct in_addr ipv4_ll
;
84 * The ipv4_ll data structure is used for all 5549
85 * additions to the kernel. Let's figure out the
86 * correct value one time instead for every
87 * install/remove of a 5549 type route
89 void rt_netlink_init(void)
91 inet_pton(AF_INET
, ipv4_ll_buf
, &ipv4_ll
);
94 static inline int is_selfroute(int proto
)
96 if ((proto
== RTPROT_BGP
) || (proto
== RTPROT_OSPF
)
97 || (proto
== RTPROT_STATIC
) || (proto
== RTPROT_ZEBRA
)
98 || (proto
== RTPROT_ISIS
) || (proto
== RTPROT_RIPNG
)
99 || (proto
== RTPROT_NHRP
) || (proto
== RTPROT_EIGRP
)
100 || (proto
== RTPROT_LDP
) || (proto
== RTPROT_BABEL
)
101 || (proto
== RTPROT_RIP
)) {
108 static inline int zebra2proto(int proto
)
111 case ZEBRA_ROUTE_BABEL
:
112 proto
= RTPROT_BABEL
;
114 case ZEBRA_ROUTE_BGP
:
117 case ZEBRA_ROUTE_OSPF
:
118 case ZEBRA_ROUTE_OSPF6
:
121 case ZEBRA_ROUTE_STATIC
:
122 proto
= RTPROT_STATIC
;
124 case ZEBRA_ROUTE_ISIS
:
127 case ZEBRA_ROUTE_RIP
:
130 case ZEBRA_ROUTE_RIPNG
:
131 proto
= RTPROT_RIPNG
;
133 case ZEBRA_ROUTE_NHRP
:
136 case ZEBRA_ROUTE_EIGRP
:
137 proto
= RTPROT_EIGRP
;
139 case ZEBRA_ROUTE_LDP
:
143 proto
= RTPROT_ZEBRA
;
150 static inline int proto2zebra(int proto
, int family
)
154 proto
= ZEBRA_ROUTE_BABEL
;
157 proto
= ZEBRA_ROUTE_BGP
;
160 proto
= (family
== AFI_IP
) ?
161 ZEBRA_ROUTE_OSPF
: ZEBRA_ROUTE_OSPF6
;
164 proto
= ZEBRA_ROUTE_ISIS
;
167 proto
= ZEBRA_ROUTE_RIP
;
170 proto
= ZEBRA_ROUTE_RIPNG
;
173 proto
= ZEBRA_ROUTE_NHRP
;
176 proto
= ZEBRA_ROUTE_EIGRP
;
179 proto
= ZEBRA_ROUTE_LDP
;
182 proto
= ZEBRA_ROUTE_STATIC
;
185 proto
= ZEBRA_ROUTE_KERNEL
;
192 Pending: create an efficient table_id (in a tree/hash) based lookup)
194 static vrf_id_t
vrf_lookup_by_table(u_int32_t table_id
)
197 struct zebra_vrf
*zvrf
;
199 RB_FOREACH (vrf
, vrf_id_head
, &vrfs_by_id
) {
200 if ((zvrf
= vrf
->info
) == NULL
|| (zvrf
->table_id
!= table_id
))
203 return zvrf_id(zvrf
);
209 /* Looking up routing table by netlink interface. */
210 static int netlink_route_change_read_unicast(struct sockaddr_nl
*snl
,
211 struct nlmsghdr
*h
, ns_id_t ns_id
,
216 struct rtattr
*tb
[RTA_MAX
+ 1];
219 struct prefix_ipv6 src_p
= {};
220 vrf_id_t vrf_id
= VRF_DEFAULT
;
222 char anyaddr
[16] = {0};
224 int proto
= ZEBRA_ROUTE_KERNEL
;
229 uint8_t distance
= 0;
233 void *prefsrc
= NULL
; /* IPv4 preferred source host address */
234 void *src
= NULL
; /* IPv6 srcdest source prefix */
235 enum blackhole_type bh_type
= BLACKHOLE_UNSPEC
;
239 if (startup
&& h
->nlmsg_type
!= RTM_NEWROUTE
)
241 switch (rtm
->rtm_type
) {
245 bh_type
= BLACKHOLE_NULL
;
247 case RTN_UNREACHABLE
:
248 bh_type
= BLACKHOLE_REJECT
;
251 bh_type
= BLACKHOLE_ADMINPROHIB
;
257 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct rtmsg
));
261 memset(tb
, 0, sizeof tb
);
262 netlink_parse_rtattr(tb
, RTA_MAX
, RTM_RTA(rtm
), len
);
264 if (rtm
->rtm_flags
& RTM_F_CLONED
)
266 if (rtm
->rtm_protocol
== RTPROT_REDIRECT
)
268 if (rtm
->rtm_protocol
== RTPROT_KERNEL
)
271 if (!startup
&& is_selfroute(rtm
->rtm_protocol
)
272 && h
->nlmsg_type
== RTM_NEWROUTE
)
275 /* We don't care about change notifications for the MPLS table. */
276 /* TODO: Revisit this. */
277 if (rtm
->rtm_family
== AF_MPLS
)
280 /* Table corresponding to route. */
282 table
= *(int *)RTA_DATA(tb
[RTA_TABLE
]);
284 table
= rtm
->rtm_table
;
287 vrf_id
= vrf_lookup_by_table(table
);
288 if (vrf_id
== VRF_DEFAULT
) {
289 if (!is_zebra_valid_kernel_table(table
)
290 && !is_zebra_main_routing_table(table
))
294 /* Route which inserted by Zebra. */
295 if (is_selfroute(rtm
->rtm_protocol
)) {
296 flags
|= ZEBRA_FLAG_SELFROUTE
;
297 proto
= proto2zebra(rtm
->rtm_protocol
, rtm
->rtm_family
);
300 index
= *(int *)RTA_DATA(tb
[RTA_OIF
]);
303 dest
= RTA_DATA(tb
[RTA_DST
]);
308 src
= RTA_DATA(tb
[RTA_SRC
]);
313 prefsrc
= RTA_DATA(tb
[RTA_PREFSRC
]);
316 gate
= RTA_DATA(tb
[RTA_GATEWAY
]);
318 if (tb
[RTA_PRIORITY
])
319 metric
= *(int *)RTA_DATA(tb
[RTA_PRIORITY
]);
321 if (tb
[RTA_METRICS
]) {
322 struct rtattr
*mxrta
[RTAX_MAX
+ 1];
324 memset(mxrta
, 0, sizeof mxrta
);
325 netlink_parse_rtattr(mxrta
, RTAX_MAX
,
326 RTA_DATA(tb
[RTA_METRICS
]),
327 RTA_PAYLOAD(tb
[RTA_METRICS
]));
330 mtu
= *(u_int32_t
*)RTA_DATA(mxrta
[RTAX_MTU
]);
333 if (rtm
->rtm_family
== AF_INET
) {
335 memcpy(&p
.u
.prefix4
, dest
, 4);
336 p
.prefixlen
= rtm
->rtm_dst_len
;
339 0; // Forces debug below to not display anything
340 } else if (rtm
->rtm_family
== AF_INET6
) {
342 memcpy(&p
.u
.prefix6
, dest
, 16);
343 p
.prefixlen
= rtm
->rtm_dst_len
;
345 src_p
.family
= AF_INET6
;
346 memcpy(&src_p
.prefix
, src
, 16);
347 src_p
.prefixlen
= rtm
->rtm_src_len
;
350 if (rtm
->rtm_src_len
!= 0) {
351 char buf
[PREFIX_STRLEN
];
353 "unsupported IPv[4|6] sourcedest route (dest %s vrf %u)",
354 prefix2str(&p
, buf
, sizeof(buf
)), vrf_id
);
359 * For ZEBRA_ROUTE_KERNEL types:
361 * The metric/priority of the route received from the kernel
362 * is a 32 bit number. We are going to interpret the high
363 * order byte as the Admin Distance and the low order 3 bytes
366 * This will allow us to do two things:
367 * 1) Allow the creation of kernel routes that can be
368 * overridden by zebra.
369 * 2) Allow the old behavior for 'most' kernel route types
370 * if a user enters 'ip route ...' v4 routes get a metric
371 * of 0 and v6 routes get a metric of 1024. Both of these
372 * values will end up with a admin distance of 0, which
373 * will cause them to win for the purposes of zebra.
375 if (proto
== ZEBRA_ROUTE_KERNEL
) {
376 distance
= (metric
>> 24) & 0xFF;
377 metric
= (metric
& 0x00FFFFFF);
380 if (IS_ZEBRA_DEBUG_KERNEL
) {
381 char buf
[PREFIX_STRLEN
];
382 char buf2
[PREFIX_STRLEN
];
384 "%s %s%s%s vrf %u metric: %d Admin Distance: %d", nl_msg_type_to_str(h
->nlmsg_type
),
385 prefix2str(&p
, buf
, sizeof(buf
)),
386 src_p
.prefixlen
? " from " : "",
387 src_p
.prefixlen
? prefix2str(&src_p
, buf2
, sizeof(buf2
))
389 vrf_id
, metric
, distance
);
393 if (rtm
->rtm_family
== AF_INET6
)
396 if (h
->nlmsg_type
== RTM_NEWROUTE
) {
397 if (!tb
[RTA_MULTIPATH
]) {
399 size_t sz
= (afi
== AFI_IP
) ? 4 : 16;
401 memset(&nh
, 0, sizeof(nh
));
403 if (bh_type
== BLACKHOLE_UNSPEC
) {
405 nh
.type
= NEXTHOP_TYPE_IFINDEX
;
406 else if (index
&& gate
)
407 nh
.type
= (afi
== AFI_IP
)
408 ? NEXTHOP_TYPE_IPV4_IFINDEX
409 : NEXTHOP_TYPE_IPV6_IFINDEX
;
410 else if (!index
&& gate
)
411 nh
.type
= (afi
== AFI_IP
)
415 nh
.type
= NEXTHOP_TYPE_BLACKHOLE
;
416 nh
.bh_type
= bh_type
;
419 nh
.type
= NEXTHOP_TYPE_BLACKHOLE
;
420 nh
.bh_type
= bh_type
;
424 memcpy(&nh
.src
, prefsrc
, sz
);
426 memcpy(&nh
.gate
, gate
, sz
);
428 rib_add(afi
, SAFI_UNICAST
, vrf_id
, proto
,
429 0, flags
, &p
, NULL
, &nh
, table
, metric
, mtu
, distance
);
431 /* This is a multipath route */
433 struct route_entry
*re
;
434 struct rtnexthop
*rtnh
=
435 (struct rtnexthop
*)RTA_DATA(tb
[RTA_MULTIPATH
]);
437 len
= RTA_PAYLOAD(tb
[RTA_MULTIPATH
]);
439 re
= XCALLOC(MTYPE_RE
, sizeof(struct route_entry
));
441 re
->distance
= distance
;
448 re
->uptime
= time(NULL
);
451 if (len
< (int)sizeof(*rtnh
)
452 || rtnh
->rtnh_len
> len
)
455 index
= rtnh
->rtnh_ifindex
;
457 if (rtnh
->rtnh_len
> sizeof(*rtnh
)) {
458 memset(tb
, 0, sizeof(tb
));
459 netlink_parse_rtattr(
460 tb
, RTA_MAX
, RTNH_DATA(rtnh
),
461 rtnh
->rtnh_len
- sizeof(*rtnh
));
468 if (rtm
->rtm_family
== AF_INET
) {
470 route_entry_nexthop_ipv4_ifindex_add(
474 route_entry_nexthop_ipv4_add(
477 } else if (rtm
->rtm_family
480 route_entry_nexthop_ipv6_ifindex_add(
484 route_entry_nexthop_ipv6_add(
488 route_entry_nexthop_ifindex_add(re
,
491 len
-= NLMSG_ALIGN(rtnh
->rtnh_len
);
492 rtnh
= RTNH_NEXT(rtnh
);
495 zserv_nexthop_num_warn(__func__
,
496 (const struct prefix
*)&p
,
498 if (re
->nexthop_num
== 0)
501 rib_add_multipath(afi
, SAFI_UNICAST
, &p
,
505 if (!tb
[RTA_MULTIPATH
]) {
507 size_t sz
= (afi
== AFI_IP
) ? 4 : 16;
509 memset(&nh
, 0, sizeof(nh
));
510 if (bh_type
== BLACKHOLE_UNSPEC
) {
512 nh
.type
= NEXTHOP_TYPE_IFINDEX
;
513 else if (index
&& gate
)
516 ? NEXTHOP_TYPE_IPV4_IFINDEX
517 : NEXTHOP_TYPE_IPV6_IFINDEX
;
518 else if (!index
&& gate
)
519 nh
.type
= (afi
== AFI_IP
)
523 nh
.type
= NEXTHOP_TYPE_BLACKHOLE
;
524 nh
.bh_type
= BLACKHOLE_UNSPEC
;
527 nh
.type
= NEXTHOP_TYPE_BLACKHOLE
;
528 nh
.bh_type
= bh_type
;
532 memcpy(&nh
.gate
, gate
, sz
);
533 rib_delete(afi
, SAFI_UNICAST
, vrf_id
,
534 proto
, 0, flags
, &p
, NULL
, &nh
,
535 table
, metric
, true);
537 /* XXX: need to compare the entire list of nexthops
538 * here for NLM_F_APPEND stupidity */
539 rib_delete(afi
, SAFI_UNICAST
, vrf_id
,
540 proto
, 0, flags
, &p
, NULL
, NULL
,
541 table
, metric
, true);
548 static struct mcast_route_data
*mroute
= NULL
;
550 static int netlink_route_change_read_multicast(struct sockaddr_nl
*snl
,
552 ns_id_t ns_id
, int startup
)
556 struct rtattr
*tb
[RTA_MAX
+ 1];
557 struct mcast_route_data
*m
;
558 struct mcast_route_data mr
;
565 char oif_list
[256] = "\0";
566 vrf_id_t vrf
= ns_id
;
572 memset(&mr
, 0, sizeof(mr
));
578 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct rtmsg
));
580 memset(tb
, 0, sizeof tb
);
581 netlink_parse_rtattr(tb
, RTA_MAX
, RTM_RTA(rtm
), len
);
584 table
= *(int *)RTA_DATA(tb
[RTA_TABLE
]);
586 table
= rtm
->rtm_table
;
588 vrf
= vrf_lookup_by_table(table
);
591 iif
= *(int *)RTA_DATA(tb
[RTA_IIF
]);
594 m
->sg
.src
= *(struct in_addr
*)RTA_DATA(tb
[RTA_SRC
]);
597 m
->sg
.grp
= *(struct in_addr
*)RTA_DATA(tb
[RTA_DST
]);
599 if ((RTA_EXPIRES
<= RTA_MAX
) && tb
[RTA_EXPIRES
])
600 m
->lastused
= *(unsigned long long *)RTA_DATA(tb
[RTA_EXPIRES
]);
602 if (tb
[RTA_MULTIPATH
]) {
603 struct rtnexthop
*rtnh
=
604 (struct rtnexthop
*)RTA_DATA(tb
[RTA_MULTIPATH
]);
606 len
= RTA_PAYLOAD(tb
[RTA_MULTIPATH
]);
608 if (len
< (int)sizeof(*rtnh
) || rtnh
->rtnh_len
> len
)
611 oif
[oif_count
] = rtnh
->rtnh_ifindex
;
614 len
-= NLMSG_ALIGN(rtnh
->rtnh_len
);
615 rtnh
= RTNH_NEXT(rtnh
);
619 if (IS_ZEBRA_DEBUG_KERNEL
) {
620 struct interface
*ifp
;
621 strlcpy(sbuf
, inet_ntoa(m
->sg
.src
), sizeof(sbuf
));
622 strlcpy(gbuf
, inet_ntoa(m
->sg
.grp
), sizeof(gbuf
));
623 for (count
= 0; count
< oif_count
; count
++) {
624 ifp
= if_lookup_by_index(oif
[count
], vrf
);
627 sprintf(temp
, "%s ", ifp
->name
);
628 strcat(oif_list
, temp
);
630 struct zebra_vrf
*zvrf
= zebra_vrf_lookup_by_id(vrf
);
631 ifp
= if_lookup_by_index(iif
, vrf
);
633 "MCAST VRF: %s(%d) %s (%s,%s) IIF: %s OIF: %s jiffies: %lld",
634 zvrf
->vrf
->name
, vrf
, nl_msg_type_to_str(h
->nlmsg_type
),
635 sbuf
, gbuf
, ifp
->name
, oif_list
, m
->lastused
);
640 int netlink_route_change(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
641 ns_id_t ns_id
, int startup
)
644 vrf_id_t vrf_id
= ns_id
;
649 if (!(h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)) {
650 /* If this is not route add/delete message print warning. */
651 zlog_warn("Kernel message: %d vrf %u\n", h
->nlmsg_type
, vrf_id
);
655 /* Connected route. */
656 if (IS_ZEBRA_DEBUG_KERNEL
)
657 zlog_debug("%s %s %s proto %s vrf %u",
658 nl_msg_type_to_str(h
->nlmsg_type
),
659 nl_family_to_str(rtm
->rtm_family
),
660 nl_rttype_to_str(rtm
->rtm_type
),
661 nl_rtproto_to_str(rtm
->rtm_protocol
), vrf_id
);
663 /* We don't care about change notifications for the MPLS table. */
664 /* TODO: Revisit this. */
665 if (rtm
->rtm_family
== AF_MPLS
)
668 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct rtmsg
));
672 if (rtm
->rtm_type
== RTN_MULTICAST
)
673 netlink_route_change_read_multicast(snl
, h
, ns_id
, startup
);
675 netlink_route_change_read_unicast(snl
, h
, ns_id
, startup
);
679 /* Request for specific route information from the kernel */
680 static int netlink_request_route(struct zebra_ns
*zns
, int family
, int type
)
687 /* Form the request, specifying filter (rtattr) if needed. */
688 memset(&req
, 0, sizeof(req
));
689 req
.n
.nlmsg_type
= type
;
690 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
691 req
.rtm
.rtm_family
= family
;
693 return netlink_request(&zns
->netlink_cmd
, &req
.n
);
696 /* Routing table read function using netlink interface. Only called
698 int netlink_route_read(struct zebra_ns
*zns
)
702 /* Get IPv4 routing table. */
703 ret
= netlink_request_route(zns
, AF_INET
, RTM_GETROUTE
);
706 ret
= netlink_parse_info(netlink_route_change_read_unicast
,
707 &zns
->netlink_cmd
, zns
, 0, 1);
711 /* Get IPv6 routing table. */
712 ret
= netlink_request_route(zns
, AF_INET6
, RTM_GETROUTE
);
715 ret
= netlink_parse_info(netlink_route_change_read_unicast
,
716 &zns
->netlink_cmd
, zns
, 0, 1);
723 static void _netlink_route_nl_add_gateway_info(u_char route_family
,
725 struct nlmsghdr
*nlmsg
,
726 size_t req_size
, int bytelen
,
727 struct nexthop
*nexthop
)
729 if (route_family
== AF_MPLS
) {
730 struct gw_family_t gw_fam
;
732 gw_fam
.family
= gw_family
;
733 if (gw_family
== AF_INET
)
734 memcpy(&gw_fam
.gate
.ipv4
, &nexthop
->gate
.ipv4
, bytelen
);
736 memcpy(&gw_fam
.gate
.ipv6
, &nexthop
->gate
.ipv6
, bytelen
);
737 addattr_l(nlmsg
, req_size
, RTA_VIA
, &gw_fam
.family
,
740 if (gw_family
== AF_INET
)
741 addattr_l(nlmsg
, req_size
, RTA_GATEWAY
,
742 &nexthop
->gate
.ipv4
, bytelen
);
744 addattr_l(nlmsg
, req_size
, RTA_GATEWAY
,
745 &nexthop
->gate
.ipv6
, bytelen
);
749 static void _netlink_route_rta_add_gateway_info(u_char route_family
,
752 struct rtnexthop
*rtnh
,
753 size_t req_size
, int bytelen
,
754 struct nexthop
*nexthop
)
756 if (route_family
== AF_MPLS
) {
757 struct gw_family_t gw_fam
;
759 gw_fam
.family
= gw_family
;
760 if (gw_family
== AF_INET
)
761 memcpy(&gw_fam
.gate
.ipv4
, &nexthop
->gate
.ipv4
, bytelen
);
763 memcpy(&gw_fam
.gate
.ipv6
, &nexthop
->gate
.ipv6
, bytelen
);
764 rta_addattr_l(rta
, req_size
, RTA_VIA
, &gw_fam
.family
,
766 rtnh
->rtnh_len
+= RTA_LENGTH(bytelen
+ 2);
768 if (gw_family
== AF_INET
)
769 rta_addattr_l(rta
, req_size
, RTA_GATEWAY
,
770 &nexthop
->gate
.ipv4
, bytelen
);
772 rta_addattr_l(rta
, req_size
, RTA_GATEWAY
,
773 &nexthop
->gate
.ipv6
, bytelen
);
774 rtnh
->rtnh_len
+= sizeof(struct rtattr
) + bytelen
;
778 /* This function takes a nexthop as argument and adds
779 * the appropriate netlink attributes to an existing
782 * @param routedesc: Human readable description of route type
783 * (direct/recursive, single-/multipath)
784 * @param bytelen: Length of addresses in bytes.
785 * @param nexthop: Nexthop information
786 * @param nlmsg: nlmsghdr structure to fill in.
787 * @param req_size: The size allocated for the message.
789 static void _netlink_route_build_singlepath(const char *routedesc
, int bytelen
,
790 struct nexthop
*nexthop
,
791 struct nlmsghdr
*nlmsg
,
793 size_t req_size
, int cmd
)
795 struct nexthop_label
*nh_label
;
796 mpls_lse_t out_lse
[MPLS_MAX_LABELS
];
800 * label_buf is *only* currently used within debugging.
801 * As such when we assign it we are guarding it inside
802 * a debug test. If you want to change this make sure
803 * you fix this assumption
806 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
807 * (in the case of LER)
809 nh_label
= nexthop
->nh_label
;
810 if (rtmsg
->rtm_family
== AF_MPLS
) {
812 assert(nh_label
->num_labels
== 1);
815 if (nh_label
&& nh_label
->num_labels
) {
816 int i
, num_labels
= 0;
820 for (i
= 0; i
< nh_label
->num_labels
; i
++) {
821 if (nh_label
->label
[i
] != MPLS_IMP_NULL_LABEL
) {
822 bos
= ((i
== (nh_label
->num_labels
- 1)) ? 1
824 out_lse
[i
] = mpls_lse_encode(nh_label
->label
[i
],
826 if (IS_ZEBRA_DEBUG_KERNEL
) {
828 sprintf(label_buf
, "label %u",
831 sprintf(label_buf1
, "/%u",
833 strlcat(label_buf
, label_buf1
,
841 if (rtmsg
->rtm_family
== AF_MPLS
)
842 addattr_l(nlmsg
, req_size
, RTA_NEWDST
, &out_lse
,
843 num_labels
* sizeof(mpls_lse_t
));
846 u_int16_t encap
= LWTUNNEL_ENCAP_MPLS
;
848 addattr_l(nlmsg
, req_size
, RTA_ENCAP_TYPE
,
849 &encap
, sizeof(u_int16_t
));
850 nest
= addattr_nest(nlmsg
, req_size
, RTA_ENCAP
);
851 addattr_l(nlmsg
, req_size
, MPLS_IPTUNNEL_DST
,
853 num_labels
* sizeof(mpls_lse_t
));
854 addattr_nest_end(nlmsg
, nest
);
859 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_ONLINK
))
860 rtmsg
->rtm_flags
|= RTNH_F_ONLINK
;
862 if (rtmsg
->rtm_family
== AF_INET
863 && (nexthop
->type
== NEXTHOP_TYPE_IPV6
864 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
)) {
865 rtmsg
->rtm_flags
|= RTNH_F_ONLINK
;
866 addattr_l(nlmsg
, req_size
, RTA_GATEWAY
, &ipv4_ll
, 4);
867 addattr32(nlmsg
, req_size
, RTA_OIF
, nexthop
->ifindex
);
869 if (nexthop
->rmap_src
.ipv4
.s_addr
&& (cmd
== RTM_NEWROUTE
))
870 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
871 &nexthop
->rmap_src
.ipv4
, bytelen
);
872 else if (nexthop
->src
.ipv4
.s_addr
&& (cmd
== RTM_NEWROUTE
))
873 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
874 &nexthop
->src
.ipv4
, bytelen
);
876 if (IS_ZEBRA_DEBUG_KERNEL
)
878 " 5549: _netlink_route_build_singlepath() (%s): "
879 "nexthop via %s %s if %u",
880 routedesc
, ipv4_ll_buf
, label_buf
,
885 if (nexthop
->type
== NEXTHOP_TYPE_IPV4
886 || nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
) {
887 /* Send deletes to the kernel without specifying the next-hop */
888 if (cmd
!= RTM_DELROUTE
)
889 _netlink_route_nl_add_gateway_info(
890 rtmsg
->rtm_family
, AF_INET
, nlmsg
, req_size
,
893 if (cmd
== RTM_NEWROUTE
) {
894 if (nexthop
->rmap_src
.ipv4
.s_addr
)
895 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
896 &nexthop
->rmap_src
.ipv4
, bytelen
);
897 else if (nexthop
->src
.ipv4
.s_addr
)
898 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
899 &nexthop
->src
.ipv4
, bytelen
);
902 if (IS_ZEBRA_DEBUG_KERNEL
)
904 "netlink_route_multipath() (%s): "
905 "nexthop via %s %s if %u",
906 routedesc
, inet_ntoa(nexthop
->gate
.ipv4
),
907 label_buf
, nexthop
->ifindex
);
910 if (nexthop
->type
== NEXTHOP_TYPE_IPV6
911 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
912 _netlink_route_nl_add_gateway_info(rtmsg
->rtm_family
, AF_INET6
,
913 nlmsg
, req_size
, bytelen
,
916 if (cmd
== RTM_NEWROUTE
) {
917 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->rmap_src
.ipv6
))
918 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
919 &nexthop
->rmap_src
.ipv6
, bytelen
);
920 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->src
.ipv6
))
921 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
922 &nexthop
->src
.ipv6
, bytelen
);
925 if (IS_ZEBRA_DEBUG_KERNEL
)
927 "netlink_route_multipath() (%s): "
928 "nexthop via %s %s if %u",
929 routedesc
, inet6_ntoa(nexthop
->gate
.ipv6
),
930 label_buf
, nexthop
->ifindex
);
932 if (nexthop
->type
== NEXTHOP_TYPE_IFINDEX
933 || nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
) {
934 addattr32(nlmsg
, req_size
, RTA_OIF
, nexthop
->ifindex
);
936 if (cmd
== RTM_NEWROUTE
) {
937 if (nexthop
->rmap_src
.ipv4
.s_addr
)
938 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
939 &nexthop
->rmap_src
.ipv4
, bytelen
);
940 else if (nexthop
->src
.ipv4
.s_addr
)
941 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
942 &nexthop
->src
.ipv4
, bytelen
);
945 if (IS_ZEBRA_DEBUG_KERNEL
)
947 "netlink_route_multipath() (%s): "
949 routedesc
, nexthop
->ifindex
);
952 if (nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
953 addattr32(nlmsg
, req_size
, RTA_OIF
, nexthop
->ifindex
);
955 if (cmd
== RTM_NEWROUTE
) {
956 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->rmap_src
.ipv6
))
957 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
958 &nexthop
->rmap_src
.ipv6
, bytelen
);
959 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->src
.ipv6
))
960 addattr_l(nlmsg
, req_size
, RTA_PREFSRC
,
961 &nexthop
->src
.ipv6
, bytelen
);
964 if (IS_ZEBRA_DEBUG_KERNEL
)
966 "netlink_route_multipath() (%s): "
968 routedesc
, nexthop
->ifindex
);
972 /* This function takes a nexthop as argument and
973 * appends to the given rtattr/rtnexthop pair the
974 * representation of the nexthop. If the nexthop
975 * defines a preferred source, the src parameter
976 * will be modified to point to that src, otherwise
977 * it will be kept unmodified.
979 * @param routedesc: Human readable description of route type
980 * (direct/recursive, single-/multipath)
981 * @param bytelen: Length of addresses in bytes.
982 * @param nexthop: Nexthop information
983 * @param rta: rtnetlink attribute structure
984 * @param rtnh: pointer to an rtnetlink nexthop structure
985 * @param src: pointer pointing to a location where
986 * the prefsrc should be stored.
988 static void _netlink_route_build_multipath(const char *routedesc
, int bytelen
,
989 struct nexthop
*nexthop
,
991 struct rtnexthop
*rtnh
,
995 struct nexthop_label
*nh_label
;
996 mpls_lse_t out_lse
[MPLS_MAX_LABELS
];
999 rtnh
->rtnh_len
= sizeof(*rtnh
);
1000 rtnh
->rtnh_flags
= 0;
1001 rtnh
->rtnh_hops
= 0;
1002 rta
->rta_len
+= rtnh
->rtnh_len
;
1005 * label_buf is *only* currently used within debugging.
1006 * As such when we assign it we are guarding it inside
1007 * a debug test. If you want to change this make sure
1008 * you fix this assumption
1010 label_buf
[0] = '\0';
1011 /* outgoing label - either as NEWDST (in the case of LSR) or as ENCAP
1012 * (in the case of LER)
1014 nh_label
= nexthop
->nh_label
;
1015 if (rtmsg
->rtm_family
== AF_MPLS
) {
1017 assert(nh_label
->num_labels
== 1);
1020 if (nh_label
&& nh_label
->num_labels
) {
1021 int i
, num_labels
= 0;
1023 char label_buf1
[20];
1025 for (i
= 0; i
< nh_label
->num_labels
; i
++) {
1026 if (nh_label
->label
[i
] != MPLS_IMP_NULL_LABEL
) {
1027 bos
= ((i
== (nh_label
->num_labels
- 1)) ? 1
1029 out_lse
[i
] = mpls_lse_encode(nh_label
->label
[i
],
1031 if (IS_ZEBRA_DEBUG_KERNEL
) {
1033 sprintf(label_buf
, "label %u",
1034 nh_label
->label
[i
]);
1036 sprintf(label_buf1
, "/%u",
1037 nh_label
->label
[i
]);
1038 strlcat(label_buf
, label_buf1
,
1046 if (rtmsg
->rtm_family
== AF_MPLS
) {
1047 rta_addattr_l(rta
, NL_PKT_BUF_SIZE
, RTA_NEWDST
,
1049 num_labels
* sizeof(mpls_lse_t
));
1050 rtnh
->rtnh_len
+= RTA_LENGTH(
1051 num_labels
* sizeof(mpls_lse_t
));
1053 struct rtattr
*nest
;
1054 u_int16_t encap
= LWTUNNEL_ENCAP_MPLS
;
1055 int len
= rta
->rta_len
;
1057 rta_addattr_l(rta
, NL_PKT_BUF_SIZE
,
1058 RTA_ENCAP_TYPE
, &encap
,
1060 nest
= rta_nest(rta
, NL_PKT_BUF_SIZE
,
1062 rta_addattr_l(rta
, NL_PKT_BUF_SIZE
,
1063 MPLS_IPTUNNEL_DST
, &out_lse
,
1064 num_labels
* sizeof(mpls_lse_t
));
1065 rta_nest_end(rta
, nest
);
1066 rtnh
->rtnh_len
+= rta
->rta_len
- len
;
1071 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_ONLINK
))
1072 rtnh
->rtnh_flags
|= RTNH_F_ONLINK
;
1074 if (rtmsg
->rtm_family
== AF_INET
1075 && (nexthop
->type
== NEXTHOP_TYPE_IPV6
1076 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
)) {
1078 rtnh
->rtnh_flags
|= RTNH_F_ONLINK
;
1079 rta_addattr_l(rta
, NL_PKT_BUF_SIZE
, RTA_GATEWAY
, &ipv4_ll
,
1081 rtnh
->rtnh_len
+= sizeof(struct rtattr
) + bytelen
;
1082 rtnh
->rtnh_ifindex
= nexthop
->ifindex
;
1084 if (nexthop
->rmap_src
.ipv4
.s_addr
)
1085 *src
= &nexthop
->rmap_src
;
1086 else if (nexthop
->src
.ipv4
.s_addr
)
1087 *src
= &nexthop
->src
;
1089 if (IS_ZEBRA_DEBUG_KERNEL
)
1091 " 5549: netlink_route_build_multipath() (%s): "
1092 "nexthop via %s %s if %u",
1093 routedesc
, ipv4_ll_buf
, label_buf
,
1098 if (nexthop
->type
== NEXTHOP_TYPE_IPV4
1099 || nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
) {
1100 _netlink_route_rta_add_gateway_info(rtmsg
->rtm_family
, AF_INET
,
1101 rta
, rtnh
, NL_PKT_BUF_SIZE
,
1103 if (nexthop
->rmap_src
.ipv4
.s_addr
)
1104 *src
= &nexthop
->rmap_src
;
1105 else if (nexthop
->src
.ipv4
.s_addr
)
1106 *src
= &nexthop
->src
;
1108 if (IS_ZEBRA_DEBUG_KERNEL
)
1110 "netlink_route_multipath() (%s): "
1111 "nexthop via %s %s if %u",
1112 routedesc
, inet_ntoa(nexthop
->gate
.ipv4
),
1113 label_buf
, nexthop
->ifindex
);
1115 if (nexthop
->type
== NEXTHOP_TYPE_IPV6
1116 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
1117 _netlink_route_rta_add_gateway_info(rtmsg
->rtm_family
, AF_INET6
,
1118 rta
, rtnh
, NL_PKT_BUF_SIZE
,
1121 if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->rmap_src
.ipv6
))
1122 *src
= &nexthop
->rmap_src
;
1123 else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop
->src
.ipv6
))
1124 *src
= &nexthop
->src
;
1126 if (IS_ZEBRA_DEBUG_KERNEL
)
1128 "netlink_route_multipath() (%s): "
1129 "nexthop via %s %s if %u",
1130 routedesc
, inet6_ntoa(nexthop
->gate
.ipv6
),
1131 label_buf
, nexthop
->ifindex
);
1134 if (nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
1135 || nexthop
->type
== NEXTHOP_TYPE_IFINDEX
) {
1136 rtnh
->rtnh_ifindex
= nexthop
->ifindex
;
1138 if (nexthop
->rmap_src
.ipv4
.s_addr
)
1139 *src
= &nexthop
->rmap_src
;
1140 else if (nexthop
->src
.ipv4
.s_addr
)
1141 *src
= &nexthop
->src
;
1143 if (IS_ZEBRA_DEBUG_KERNEL
)
1145 "netlink_route_multipath() (%s): "
1146 "nexthop via if %u",
1147 routedesc
, nexthop
->ifindex
);
1148 } else if (nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
1149 rtnh
->rtnh_ifindex
= nexthop
->ifindex
;
1151 if (IS_ZEBRA_DEBUG_KERNEL
)
1153 "netlink_route_multipath() (%s): "
1154 "nexthop via if %u",
1155 routedesc
, nexthop
->ifindex
);
1157 rtnh
->rtnh_ifindex
= 0;
1161 static inline void _netlink_mpls_build_singlepath(const char *routedesc
,
1162 zebra_nhlfe_t
*nhlfe
,
1163 struct nlmsghdr
*nlmsg
,
1164 struct rtmsg
*rtmsg
,
1165 size_t req_size
, int cmd
)
1170 family
= NHLFE_FAMILY(nhlfe
);
1171 bytelen
= (family
== AF_INET
? 4 : 16);
1172 _netlink_route_build_singlepath(routedesc
, bytelen
, nhlfe
->nexthop
,
1173 nlmsg
, rtmsg
, req_size
, cmd
);
1178 _netlink_mpls_build_multipath(const char *routedesc
, zebra_nhlfe_t
*nhlfe
,
1179 struct rtattr
*rta
, struct rtnexthop
*rtnh
,
1180 struct rtmsg
*rtmsg
, union g_addr
**src
)
1185 family
= NHLFE_FAMILY(nhlfe
);
1186 bytelen
= (family
== AF_INET
? 4 : 16);
1187 _netlink_route_build_multipath(routedesc
, bytelen
, nhlfe
->nexthop
, rta
,
1192 /* Log debug information for netlink_route_multipath
1193 * if debug logging is enabled.
1195 * @param cmd: Netlink command which is to be processed
1196 * @param p: Prefix for which the change is due
1197 * @param nexthop: Nexthop which is currently processed
1198 * @param routedesc: Semantic annotation for nexthop
1199 * (recursive, multipath, etc.)
1200 * @param family: Address family which the change concerns
1202 static void _netlink_route_debug(int cmd
, struct prefix
*p
,
1203 struct nexthop
*nexthop
, const char *routedesc
,
1204 int family
, struct zebra_vrf
*zvrf
)
1206 if (IS_ZEBRA_DEBUG_KERNEL
) {
1207 char buf
[PREFIX_STRLEN
];
1209 "netlink_route_multipath() (%s): %s %s vrf %u type %s",
1210 routedesc
, nl_msg_type_to_str(cmd
),
1211 prefix2str(p
, buf
, sizeof(buf
)), zvrf_id(zvrf
),
1212 (nexthop
) ? nexthop_type_to_str(nexthop
->type
) : "UNK");
1216 static void _netlink_mpls_debug(int cmd
, u_int32_t label
, const char *routedesc
)
1218 if (IS_ZEBRA_DEBUG_KERNEL
)
1219 zlog_debug("netlink_mpls_multipath() (%s): %s %u/20", routedesc
,
1220 nl_msg_type_to_str(cmd
), label
);
1223 static int netlink_neigh_update(int cmd
, int ifindex
, uint32_t addr
, char *lla
,
1232 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
1234 memset(&req
.n
, 0, sizeof(req
.n
));
1235 memset(&req
.ndm
, 0, sizeof(req
.ndm
));
1237 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
1238 req
.n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
1239 req
.n
.nlmsg_type
= cmd
; // RTM_NEWNEIGH or RTM_DELNEIGH
1240 req
.n
.nlmsg_pid
= zns
->netlink_cmd
.snl
.nl_pid
;
1242 req
.ndm
.ndm_family
= AF_INET
;
1243 req
.ndm
.ndm_state
= NUD_PERMANENT
;
1244 req
.ndm
.ndm_ifindex
= ifindex
;
1245 req
.ndm
.ndm_type
= RTN_UNICAST
;
1247 addattr_l(&req
.n
, sizeof(req
), NDA_DST
, &addr
, 4);
1248 addattr_l(&req
.n
, sizeof(req
), NDA_LLADDR
, lla
, llalen
);
1250 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
1254 /* Routing table change via netlink interface. */
1255 /* Update flag indicates whether this is a "replace" or not. */
1256 static int netlink_route_multipath(int cmd
, struct prefix
*p
,
1257 struct prefix
*src_p
, struct route_entry
*re
,
1261 struct sockaddr_nl snl
;
1262 struct nexthop
*nexthop
= NULL
;
1263 unsigned int nexthop_num
;
1265 int family
= PREFIX_FAMILY(p
);
1266 const char *routedesc
;
1273 char buf
[NL_PKT_BUF_SIZE
];
1276 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
1277 struct zebra_vrf
*zvrf
= vrf_info_lookup(re
->vrf_id
);
1279 memset(&req
, 0, sizeof req
- NL_PKT_BUF_SIZE
);
1281 bytelen
= (family
== AF_INET
? 4 : 16);
1283 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1284 req
.n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
1285 if ((cmd
== RTM_NEWROUTE
) && update
)
1286 req
.n
.nlmsg_flags
|= NLM_F_REPLACE
;
1287 req
.n
.nlmsg_type
= cmd
;
1288 req
.n
.nlmsg_pid
= zns
->netlink_cmd
.snl
.nl_pid
;
1290 req
.r
.rtm_family
= family
;
1291 req
.r
.rtm_dst_len
= p
->prefixlen
;
1292 req
.r
.rtm_src_len
= src_p
? src_p
->prefixlen
: 0;
1293 req
.r
.rtm_protocol
= zebra2proto(re
->type
);
1294 req
.r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1295 req
.r
.rtm_type
= RTN_UNICAST
;
1297 addattr_l(&req
.n
, sizeof req
, RTA_DST
, &p
->u
.prefix
, bytelen
);
1299 addattr_l(&req
.n
, sizeof req
, RTA_SRC
, &src_p
->u
.prefix
,
1303 /* Hardcode the metric for all routes coming from zebra. Metric isn't
1305 * either by the kernel or by zebra. Its purely for calculating best
1307 * by the routing protocol and for communicating with protocol peers.
1309 addattr32(&req
.n
, sizeof req
, RTA_PRIORITY
, NL_DEFAULT_ROUTE_METRIC
);
1311 /* Table corresponding to this route. */
1312 if (re
->table
< 256)
1313 req
.r
.rtm_table
= re
->table
;
1315 req
.r
.rtm_table
= RT_TABLE_UNSPEC
;
1316 addattr32(&req
.n
, sizeof req
, RTA_TABLE
, re
->table
);
1322 if (re
->mtu
|| re
->nexthop_mtu
) {
1323 char buf
[NL_PKT_BUF_SIZE
];
1324 struct rtattr
*rta
= (void *)buf
;
1325 u_int32_t mtu
= re
->mtu
;
1326 if (!mtu
|| (re
->nexthop_mtu
&& re
->nexthop_mtu
< mtu
))
1327 mtu
= re
->nexthop_mtu
;
1328 rta
->rta_type
= RTA_METRICS
;
1329 rta
->rta_len
= RTA_LENGTH(0);
1330 rta_addattr_l(rta
, NL_PKT_BUF_SIZE
, RTAX_MTU
, &mtu
, sizeof mtu
);
1331 addattr_l(&req
.n
, NL_PKT_BUF_SIZE
, RTA_METRICS
, RTA_DATA(rta
),
1335 /* Count overall nexthops so we can decide whether to use singlepath
1336 * or multipath case. */
1338 for (ALL_NEXTHOPS(re
->nexthop
, nexthop
)) {
1339 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_RECURSIVE
))
1341 if (cmd
== RTM_NEWROUTE
1342 && !NEXTHOP_IS_ACTIVE(nexthop
->flags
))
1344 if (cmd
== RTM_DELROUTE
1345 && !CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_FIB
))
1351 /* Singlepath case. */
1352 if (nexthop_num
== 1 || multipath_num
== 1) {
1354 for (ALL_NEXTHOPS(re
->nexthop
, nexthop
)) {
1356 * So we want to cover 2 types of blackhole
1358 * 1) A normal blackhole route( ala from a static
1360 * 2) A recursively resolved blackhole route
1362 if (nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
1363 switch (nexthop
->bh_type
) {
1364 case BLACKHOLE_ADMINPROHIB
:
1365 req
.r
.rtm_type
= RTN_PROHIBIT
;
1367 case BLACKHOLE_REJECT
:
1368 req
.r
.rtm_type
= RTN_UNREACHABLE
;
1371 req
.r
.rtm_type
= RTN_BLACKHOLE
;
1376 if (CHECK_FLAG(nexthop
->flags
,
1377 NEXTHOP_FLAG_RECURSIVE
)) {
1379 if (family
== AF_INET
) {
1380 if (nexthop
->rmap_src
.ipv4
1387 } else if (nexthop
->src
.ipv4
1395 } else if (family
== AF_INET6
) {
1396 if (!IN6_IS_ADDR_UNSPECIFIED(
1404 !IN6_IS_ADDR_UNSPECIFIED(
1417 if ((cmd
== RTM_NEWROUTE
1418 && NEXTHOP_IS_ACTIVE(nexthop
->flags
))
1419 || (cmd
== RTM_DELROUTE
1420 && CHECK_FLAG(nexthop
->flags
,
1421 NEXTHOP_FLAG_FIB
))) {
1422 routedesc
= nexthop
->rparent
1423 ? "recursive, single-path"
1426 _netlink_route_debug(cmd
, p
, nexthop
, routedesc
,
1428 _netlink_route_build_singlepath(
1429 routedesc
, bytelen
, nexthop
, &req
.n
,
1430 &req
.r
, sizeof req
, cmd
);
1435 if (setsrc
&& (cmd
== RTM_NEWROUTE
)) {
1436 if (family
== AF_INET
)
1437 addattr_l(&req
.n
, sizeof req
, RTA_PREFSRC
,
1438 &src
.ipv4
, bytelen
);
1439 else if (family
== AF_INET6
)
1440 addattr_l(&req
.n
, sizeof req
, RTA_PREFSRC
,
1441 &src
.ipv6
, bytelen
);
1444 char buf
[NL_PKT_BUF_SIZE
];
1445 struct rtattr
*rta
= (void *)buf
;
1446 struct rtnexthop
*rtnh
;
1447 union g_addr
*src1
= NULL
;
1449 rta
->rta_type
= RTA_MULTIPATH
;
1450 rta
->rta_len
= RTA_LENGTH(0);
1451 rtnh
= RTA_DATA(rta
);
1454 for (ALL_NEXTHOPS(re
->nexthop
, nexthop
)) {
1455 if (nexthop_num
>= multipath_num
)
1458 if (CHECK_FLAG(nexthop
->flags
,
1459 NEXTHOP_FLAG_RECURSIVE
)) {
1460 /* This only works for IPv4 now */
1462 if (family
== AF_INET
) {
1463 if (nexthop
->rmap_src
.ipv4
1470 } else if (nexthop
->src
.ipv4
1478 } else if (family
== AF_INET6
) {
1479 if (!IN6_IS_ADDR_UNSPECIFIED(
1487 !IN6_IS_ADDR_UNSPECIFIED(
1500 if ((cmd
== RTM_NEWROUTE
1501 && NEXTHOP_IS_ACTIVE(nexthop
->flags
))
1502 || (cmd
== RTM_DELROUTE
1503 && CHECK_FLAG(nexthop
->flags
,
1504 NEXTHOP_FLAG_FIB
))) {
1505 routedesc
= nexthop
->rparent
1506 ? "recursive, multipath"
1510 _netlink_route_debug(cmd
, p
, nexthop
, routedesc
,
1512 _netlink_route_build_multipath(
1513 routedesc
, bytelen
, nexthop
, rta
, rtnh
,
1515 rtnh
= RTNH_NEXT(rtnh
);
1517 if (!setsrc
&& src1
) {
1518 if (family
== AF_INET
)
1519 src
.ipv4
= src1
->ipv4
;
1520 else if (family
== AF_INET6
)
1521 src
.ipv6
= src1
->ipv6
;
1527 if (setsrc
&& (cmd
== RTM_NEWROUTE
)) {
1528 if (family
== AF_INET
)
1529 addattr_l(&req
.n
, sizeof req
, RTA_PREFSRC
,
1530 &src
.ipv4
, bytelen
);
1531 else if (family
== AF_INET6
)
1532 addattr_l(&req
.n
, sizeof req
, RTA_PREFSRC
,
1533 &src
.ipv6
, bytelen
);
1534 if (IS_ZEBRA_DEBUG_KERNEL
)
1535 zlog_debug("Setting source");
1538 if (rta
->rta_len
> RTA_LENGTH(0))
1539 addattr_l(&req
.n
, NL_PKT_BUF_SIZE
, RTA_MULTIPATH
,
1540 RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1543 /* If there is no useful nexthop then return. */
1544 if (nexthop_num
== 0) {
1545 if (IS_ZEBRA_DEBUG_KERNEL
)
1547 "netlink_route_multipath(): No useful nexthop.");
1553 /* Destination netlink address. */
1554 memset(&snl
, 0, sizeof snl
);
1555 snl
.nl_family
= AF_NETLINK
;
1557 /* Talk to netlink socket. */
1558 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
1562 int kernel_get_ipmr_sg_stats(struct zebra_vrf
*zvrf
, void *in
)
1565 struct mcast_route_data
*mr
= (struct mcast_route_data
*)in
;
1573 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
1575 memset(&req
.n
, 0, sizeof(req
.n
));
1576 memset(&req
.ndm
, 0, sizeof(req
.ndm
));
1578 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
1579 req
.n
.nlmsg_flags
= NLM_F_REQUEST
;
1580 req
.n
.nlmsg_pid
= zns
->netlink_cmd
.snl
.nl_pid
;
1582 req
.ndm
.ndm_family
= RTNL_FAMILY_IPMR
;
1583 req
.n
.nlmsg_type
= RTM_GETROUTE
;
1585 addattr_l(&req
.n
, sizeof(req
), RTA_IIF
, &mroute
->ifindex
, 4);
1586 addattr_l(&req
.n
, sizeof(req
), RTA_OIF
, &mroute
->ifindex
, 4);
1587 addattr_l(&req
.n
, sizeof(req
), RTA_SRC
, &mroute
->sg
.src
.s_addr
, 4);
1588 addattr_l(&req
.n
, sizeof(req
), RTA_DST
, &mroute
->sg
.grp
.s_addr
, 4);
1589 addattr_l(&req
.n
, sizeof(req
), RTA_TABLE
, &zvrf
->table_id
, 4);
1591 suc
= netlink_talk(netlink_route_change_read_multicast
, &req
.n
,
1592 &zns
->netlink_cmd
, zns
, 0);
1598 int kernel_route_rib(struct prefix
*p
, struct prefix
*src_p
,
1599 struct route_entry
*old
, struct route_entry
*new)
1604 return netlink_route_multipath(RTM_NEWROUTE
, p
, src_p
, new, 0);
1606 return netlink_route_multipath(RTM_DELROUTE
, p
, src_p
, old
, 0);
1608 return netlink_route_multipath(RTM_NEWROUTE
, p
, src_p
, new, 1);
1611 int kernel_neigh_update(int add
, int ifindex
, uint32_t addr
, char *lla
,
1614 return netlink_neigh_update(add
? RTM_NEWNEIGH
: RTM_DELNEIGH
, ifindex
,
1619 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
1620 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
1622 static int netlink_vxlan_flood_list_update(struct interface
*ifp
,
1623 struct in_addr
*vtep_ip
, int cmd
)
1625 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
1631 u_char dst_mac
[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1633 memset(&req
.n
, 0, sizeof(req
.n
));
1634 memset(&req
.ndm
, 0, sizeof(req
.ndm
));
1636 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
1637 req
.n
.nlmsg_flags
= NLM_F_REQUEST
;
1638 if (cmd
== RTM_NEWNEIGH
)
1639 req
.n
.nlmsg_flags
|= (NLM_F_CREATE
| NLM_F_APPEND
);
1640 req
.n
.nlmsg_type
= cmd
;
1641 req
.ndm
.ndm_family
= PF_BRIDGE
;
1642 req
.ndm
.ndm_state
= NUD_NOARP
| NUD_PERMANENT
;
1643 req
.ndm
.ndm_flags
|= NTF_SELF
; // Handle by "self", not "master"
1646 addattr_l(&req
.n
, sizeof(req
), NDA_LLADDR
, &dst_mac
, 6);
1647 req
.ndm
.ndm_ifindex
= ifp
->ifindex
;
1648 addattr_l(&req
.n
, sizeof(req
), NDA_DST
, &vtep_ip
->s_addr
, 4);
1650 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
1655 * Add remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1657 * a "flood" MAC FDB entry.
1659 int kernel_add_vtep(vni_t vni
, struct interface
*ifp
, struct in_addr
*vtep_ip
)
1661 if (IS_ZEBRA_DEBUG_VXLAN
)
1662 zlog_debug("Install %s into flood list for VNI %u intf %s(%u)",
1663 inet_ntoa(*vtep_ip
), vni
, ifp
->name
, ifp
->ifindex
);
1665 return netlink_vxlan_flood_list_update(ifp
, vtep_ip
, RTM_NEWNEIGH
);
1669 * Remove remote VTEP for this VxLAN interface (VNI). In Linux, this involves
1670 * deleting the "flood" MAC FDB entry.
1672 int kernel_del_vtep(vni_t vni
, struct interface
*ifp
, struct in_addr
*vtep_ip
)
1674 if (IS_ZEBRA_DEBUG_VXLAN
)
1676 "Uninstall %s from flood list for VNI %u intf %s(%u)",
1677 inet_ntoa(*vtep_ip
), vni
, ifp
->name
, ifp
->ifindex
);
1679 return netlink_vxlan_flood_list_update(ifp
, vtep_ip
, RTM_DELNEIGH
);
1683 #define NDA_RTA(r) \
1684 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
1687 static int netlink_macfdb_change(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
1691 struct interface
*ifp
;
1692 struct zebra_if
*zif
;
1693 struct rtattr
*tb
[NDA_MAX
+ 1];
1694 struct interface
*br_if
;
1697 struct prefix vtep_ip
;
1698 int vid_present
= 0, dst_present
= 0;
1699 char buf
[ETHER_ADDR_STRLEN
];
1704 ndm
= NLMSG_DATA(h
);
1706 /* We only process macfdb notifications if EVPN is enabled */
1707 if (!is_evpn_enabled())
1710 /* The interface should exist. */
1711 ifp
= if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT
),
1713 if (!ifp
|| !ifp
->info
)
1716 /* The interface should be something we're interested in. */
1717 if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp
))
1720 /* Drop "permanent" entries. */
1721 if (ndm
->ndm_state
& NUD_PERMANENT
)
1724 zif
= (struct zebra_if
*)ifp
->info
;
1725 if ((br_if
= zif
->brslave_info
.br_if
) == NULL
) {
1726 zlog_warn("%s family %s IF %s(%u) brIF %u - no bridge master",
1727 nl_msg_type_to_str(h
->nlmsg_type
),
1728 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
1729 ndm
->ndm_ifindex
, zif
->brslave_info
.bridge_ifindex
);
1733 /* Parse attributes and extract fields of interest. */
1734 memset(tb
, 0, sizeof tb
);
1735 netlink_parse_rtattr(tb
, NDA_MAX
, NDA_RTA(ndm
), len
);
1737 if (!tb
[NDA_LLADDR
]) {
1738 zlog_warn("%s family %s IF %s(%u) brIF %u - no LLADDR",
1739 nl_msg_type_to_str(h
->nlmsg_type
),
1740 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
1741 ndm
->ndm_ifindex
, zif
->brslave_info
.bridge_ifindex
);
1745 if (RTA_PAYLOAD(tb
[NDA_LLADDR
]) != ETH_ALEN
) {
1747 "%s family %s IF %s(%u) brIF %u - LLADDR is not MAC, len %lu",
1748 nl_msg_type_to_str(h
->nlmsg_type
),
1749 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
1750 ndm
->ndm_ifindex
, zif
->brslave_info
.bridge_ifindex
,
1751 (unsigned long)RTA_PAYLOAD(tb
[NDA_LLADDR
]));
1755 memcpy(&mac
, RTA_DATA(tb
[NDA_LLADDR
]), ETH_ALEN
);
1757 if ((NDA_VLAN
<= NDA_MAX
) && tb
[NDA_VLAN
]) {
1759 vid
= *(u_int16_t
*)RTA_DATA(tb
[NDA_VLAN
]);
1760 sprintf(vid_buf
, " VLAN %u", vid
);
1764 /* TODO: Only IPv4 supported now. */
1766 vtep_ip
.family
= AF_INET
;
1767 vtep_ip
.prefixlen
= IPV4_MAX_BITLEN
;
1768 memcpy(&(vtep_ip
.u
.prefix4
.s_addr
), RTA_DATA(tb
[NDA_DST
]),
1770 sprintf(dst_buf
, " dst %s", inet_ntoa(vtep_ip
.u
.prefix4
));
1773 sticky
= (ndm
->ndm_state
& NUD_NOARP
) ? 1 : 0;
1775 if (IS_ZEBRA_DEBUG_KERNEL
)
1776 zlog_debug("Rx %s family %s IF %s(%u)%s %sMAC %s%s",
1777 nl_msg_type_to_str(h
->nlmsg_type
),
1778 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
1779 ndm
->ndm_ifindex
, vid_present
? vid_buf
: "",
1780 sticky
? "sticky " : "",
1781 prefix_mac2str(&mac
, buf
, sizeof(buf
)),
1782 dst_present
? dst_buf
: "");
1784 if (filter_vlan
&& vid
!= filter_vlan
)
1787 /* If add or update, do accordingly if learnt on a "local" interface; if
1788 * the notification is over VxLAN, this has to be related to
1790 * so perform an implicit delete of any local entry (if it exists).
1792 if (h
->nlmsg_type
== RTM_NEWNEIGH
) {
1793 /* Drop "permanent" entries. */
1794 if (ndm
->ndm_state
& NUD_PERMANENT
)
1797 if (IS_ZEBRA_IF_VXLAN(ifp
))
1798 return zebra_vxlan_check_del_local_mac(ifp
, br_if
, &mac
,
1801 return zebra_vxlan_local_mac_add_update(ifp
, br_if
, &mac
, vid
,
1805 /* This is a delete notification.
1806 * 1. For a MAC over VxLan, check if it needs to be refreshed(readded)
1807 * 2. For a MAC over "local" interface, delete the mac
1808 * Note: We will get notifications from both bridge driver and VxLAN
1810 * Ignore the notification from VxLan driver as it is also generated
1811 * when mac moves from remote to local.
1816 if (IS_ZEBRA_IF_VXLAN(ifp
))
1817 return zebra_vxlan_check_readd_remote_mac(ifp
, br_if
, &mac
,
1820 return zebra_vxlan_local_mac_del(ifp
, br_if
, &mac
, vid
);
1823 static int netlink_macfdb_table(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
1824 ns_id_t ns_id
, int startup
)
1829 if (h
->nlmsg_type
!= RTM_NEWNEIGH
)
1832 /* Length validity. */
1833 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct ndmsg
));
1837 /* We are interested only in AF_BRIDGE notifications. */
1838 ndm
= NLMSG_DATA(h
);
1839 if (ndm
->ndm_family
!= AF_BRIDGE
)
1842 return netlink_macfdb_change(snl
, h
, len
);
1845 /* Request for MAC FDB information from the kernel */
1846 static int netlink_request_macs(struct zebra_ns
*zns
, int family
, int type
,
1847 ifindex_t master_ifindex
)
1851 struct ifinfomsg ifm
;
1855 /* Form the request, specifying filter (rtattr) if needed. */
1856 memset(&req
, 0, sizeof(req
));
1857 req
.n
.nlmsg_type
= type
;
1858 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifinfomsg
));
1859 req
.ifm
.ifi_family
= family
;
1861 addattr32(&req
.n
, sizeof(req
), IFLA_MASTER
, master_ifindex
);
1863 return netlink_request(&zns
->netlink_cmd
, &req
.n
);
1867 * MAC forwarding database read using netlink interface. This is invoked
1870 int netlink_macfdb_read(struct zebra_ns
*zns
)
1874 /* Get bridge FDB table. */
1875 ret
= netlink_request_macs(zns
, AF_BRIDGE
, RTM_GETNEIGH
, 0);
1878 /* We are reading entire table. */
1880 ret
= netlink_parse_info(netlink_macfdb_table
, &zns
->netlink_cmd
, zns
,
1887 * MAC forwarding database read using netlink interface. This is for a
1888 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
1890 int netlink_macfdb_read_for_bridge(struct zebra_ns
*zns
, struct interface
*ifp
,
1891 struct interface
*br_if
)
1893 struct zebra_if
*br_zif
;
1894 struct zebra_if
*zif
;
1895 struct zebra_l2info_vxlan
*vxl
;
1899 /* Save VLAN we're filtering on, if needed. */
1900 br_zif
= (struct zebra_if
*)br_if
->info
;
1901 zif
= (struct zebra_if
*)ifp
->info
;
1902 vxl
= &zif
->l2info
.vxl
;
1903 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif
))
1904 filter_vlan
= vxl
->access_vlan
;
1906 /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
1908 ret
= netlink_request_macs(zns
, AF_BRIDGE
, RTM_GETNEIGH
,
1912 ret
= netlink_parse_info(netlink_macfdb_table
, &zns
->netlink_cmd
, zns
,
1915 /* Reset VLAN filter. */
1920 static int netlink_macfdb_update(struct interface
*ifp
, vlanid_t vid
,
1921 struct ethaddr
*mac
, struct in_addr vtep_ip
,
1922 int local
, int cmd
, u_char sticky
)
1924 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
1931 struct zebra_if
*zif
;
1932 struct interface
*br_if
;
1933 struct zebra_if
*br_zif
;
1934 char buf
[ETHER_ADDR_STRLEN
];
1935 int vid_present
= 0, dst_present
= 0;
1940 if ((br_if
= zif
->brslave_info
.br_if
) == NULL
) {
1941 zlog_warn("MAC %s on IF %s(%u) - no mapping to bridge",
1942 (cmd
== RTM_NEWNEIGH
) ? "add" : "del", ifp
->name
,
1947 memset(&req
.n
, 0, sizeof(req
.n
));
1948 memset(&req
.ndm
, 0, sizeof(req
.ndm
));
1950 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
1951 req
.n
.nlmsg_flags
= NLM_F_REQUEST
;
1952 if (cmd
== RTM_NEWNEIGH
)
1953 req
.n
.nlmsg_flags
|= (NLM_F_CREATE
| NLM_F_REPLACE
);
1954 req
.n
.nlmsg_type
= cmd
;
1955 req
.ndm
.ndm_family
= AF_BRIDGE
;
1956 req
.ndm
.ndm_flags
|= NTF_SELF
| NTF_MASTER
;
1957 req
.ndm
.ndm_state
= NUD_REACHABLE
;
1960 req
.ndm
.ndm_state
|= NUD_NOARP
;
1962 req
.ndm
.ndm_flags
|= NTF_EXT_LEARNED
;
1964 addattr_l(&req
.n
, sizeof(req
), NDA_LLADDR
, mac
, 6);
1965 req
.ndm
.ndm_ifindex
= ifp
->ifindex
;
1967 dst_alen
= 4; // TODO: hardcoded
1968 addattr_l(&req
.n
, sizeof(req
), NDA_DST
, &vtep_ip
, dst_alen
);
1970 sprintf(dst_buf
, " dst %s", inet_ntoa(vtep_ip
));
1972 br_zif
= (struct zebra_if
*)br_if
->info
;
1973 if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif
) && vid
> 0) {
1974 addattr16(&req
.n
, sizeof(req
), NDA_VLAN
, vid
);
1976 sprintf(vid_buf
, " VLAN %u", vid
);
1978 addattr32(&req
.n
, sizeof(req
), NDA_MASTER
, br_if
->ifindex
);
1980 if (IS_ZEBRA_DEBUG_KERNEL
)
1981 zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s%s",
1982 nl_msg_type_to_str(cmd
),
1983 nl_family_to_str(req
.ndm
.ndm_family
), ifp
->name
,
1984 ifp
->ifindex
, vid_present
? vid_buf
: "",
1985 sticky
? "sticky " : "",
1986 prefix_mac2str(mac
, buf
, sizeof(buf
)),
1987 dst_present
? dst_buf
: "");
1989 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
1994 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
1997 static int netlink_ipneigh_change(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
2001 struct interface
*ifp
;
2002 struct zebra_if
*zif
;
2003 struct rtattr
*tb
[NDA_MAX
+ 1];
2004 struct interface
*link_if
;
2007 char buf
[ETHER_ADDR_STRLEN
];
2008 char buf2
[INET6_ADDRSTRLEN
];
2009 int mac_present
= 0;
2012 ndm
= NLMSG_DATA(h
);
2014 /* We only process neigh notifications if EVPN is enabled */
2015 if (!is_evpn_enabled())
2018 /* The interface should exist. */
2019 ifp
= if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT
),
2021 if (!ifp
|| !ifp
->info
)
2024 /* Drop "permanent" entries. */
2025 if (ndm
->ndm_state
& NUD_PERMANENT
)
2028 zif
= (struct zebra_if
*)ifp
->info
;
2029 /* The neighbor is present on an SVI. From this, we locate the
2031 * bridge because we're only interested in neighbors on a VxLAN bridge.
2032 * The bridge is located based on the nature of the SVI:
2033 * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
2035 * and is linked to the bridge
2036 * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
2040 if (IS_ZEBRA_IF_VLAN(ifp
)) {
2041 link_if
= if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT
),
2045 } else if (IS_ZEBRA_IF_BRIDGE(ifp
))
2050 /* Parse attributes and extract fields of interest. */
2051 memset(tb
, 0, sizeof tb
);
2052 netlink_parse_rtattr(tb
, NDA_MAX
, NDA_RTA(ndm
), len
);
2055 zlog_warn("%s family %s IF %s(%u) - no DST",
2056 nl_msg_type_to_str(h
->nlmsg_type
),
2057 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
2061 memset(&mac
, 0, sizeof(struct ethaddr
));
2062 memset(&ip
, 0, sizeof(struct ipaddr
));
2063 ip
.ipa_type
= (ndm
->ndm_family
== AF_INET
) ? IPADDR_V4
: IPADDR_V6
;
2064 memcpy(&ip
.ip
.addr
, RTA_DATA(tb
[NDA_DST
]), RTA_PAYLOAD(tb
[NDA_DST
]));
2066 if (h
->nlmsg_type
== RTM_NEWNEIGH
) {
2067 if (tb
[NDA_LLADDR
]) {
2068 if (RTA_PAYLOAD(tb
[NDA_LLADDR
]) != ETH_ALEN
) {
2070 "%s family %s IF %s(%u) - LLADDR is not MAC, len %lu",
2071 nl_msg_type_to_str(h
->nlmsg_type
),
2072 nl_family_to_str(ndm
->ndm_family
),
2073 ifp
->name
, ndm
->ndm_ifindex
,
2074 (unsigned long)RTA_PAYLOAD(tb
[NDA_LLADDR
]));
2079 memcpy(&mac
, RTA_DATA(tb
[NDA_LLADDR
]), ETH_ALEN
);
2082 ext_learned
= (ndm
->ndm_flags
& NTF_EXT_LEARNED
) ? 1 : 0;
2084 if (IS_ZEBRA_DEBUG_KERNEL
)
2086 "Rx %s family %s IF %s(%u) IP %s MAC %s state 0x%x flags 0x%x",
2087 nl_msg_type_to_str(h
->nlmsg_type
),
2088 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
2090 ipaddr2str(&ip
, buf2
, sizeof(buf2
)),
2092 ? prefix_mac2str(&mac
, buf
, sizeof(buf
))
2094 ndm
->ndm_state
, ndm
->ndm_flags
);
2096 /* If the neighbor state is valid for use, process as an add or
2098 * else process as a delete. Note that the delete handling may
2100 * in re-adding the neighbor if it is a valid "remote" neighbor.
2102 if (ndm
->ndm_state
& NUD_VALID
)
2103 return zebra_vxlan_local_neigh_add_update(
2104 ifp
, link_if
, &ip
, &mac
, ndm
->ndm_state
,
2107 return zebra_vxlan_local_neigh_del(ifp
, link_if
, &ip
);
2110 if (IS_ZEBRA_DEBUG_KERNEL
)
2111 zlog_debug("Rx %s family %s IF %s(%u) IP %s",
2112 nl_msg_type_to_str(h
->nlmsg_type
),
2113 nl_family_to_str(ndm
->ndm_family
), ifp
->name
,
2115 ipaddr2str(&ip
, buf2
, sizeof(buf2
)));
2117 /* Process the delete - it may result in re-adding the neighbor if it is
2118 * a valid "remote" neighbor.
2120 return zebra_vxlan_local_neigh_del(ifp
, link_if
, &ip
);
2123 static int netlink_neigh_table(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
2124 ns_id_t ns_id
, int startup
)
2129 if (h
->nlmsg_type
!= RTM_NEWNEIGH
)
2132 /* Length validity. */
2133 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct ndmsg
));
2137 /* We are interested only in AF_INET or AF_INET6 notifications. */
2138 ndm
= NLMSG_DATA(h
);
2139 if (ndm
->ndm_family
!= AF_INET
&& ndm
->ndm_family
!= AF_INET6
)
2142 return netlink_neigh_change(snl
, h
, len
);
2145 /* Request for IP neighbor information from the kernel */
2146 static int netlink_request_neigh(struct zebra_ns
*zns
, int family
, int type
,
2155 /* Form the request, specifying filter (rtattr) if needed. */
2156 memset(&req
, 0, sizeof(req
));
2157 req
.n
.nlmsg_type
= type
;
2158 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
2159 req
.ndm
.ndm_family
= family
;
2161 addattr32(&req
.n
, sizeof(req
), NDA_IFINDEX
, ifindex
);
2163 return netlink_request(&zns
->netlink_cmd
, &req
.n
);
2167 * IP Neighbor table read using netlink interface. This is invoked
2170 int netlink_neigh_read(struct zebra_ns
*zns
)
2174 /* Get IP neighbor table. */
2175 ret
= netlink_request_neigh(zns
, AF_UNSPEC
, RTM_GETNEIGH
, 0);
2178 ret
= netlink_parse_info(netlink_neigh_table
, &zns
->netlink_cmd
, zns
, 0,
2185 * IP Neighbor table read using netlink interface. This is for a specific
2188 int netlink_neigh_read_for_vlan(struct zebra_ns
*zns
, struct interface
*vlan_if
)
2192 ret
= netlink_request_neigh(zns
, AF_UNSPEC
, RTM_GETNEIGH
,
2196 ret
= netlink_parse_info(netlink_neigh_table
, &zns
->netlink_cmd
, zns
, 0,
2202 int netlink_neigh_change(struct sockaddr_nl
*snl
, struct nlmsghdr
*h
,
2208 if (!(h
->nlmsg_type
== RTM_NEWNEIGH
|| h
->nlmsg_type
== RTM_DELNEIGH
))
2211 /* Length validity. */
2212 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct ndmsg
));
2216 /* Is this a notification for the MAC FDB or IP neighbor table? */
2217 ndm
= NLMSG_DATA(h
);
2218 if (ndm
->ndm_family
== AF_BRIDGE
)
2219 return netlink_macfdb_change(snl
, h
, len
);
2221 if (ndm
->ndm_type
!= RTN_UNICAST
)
2224 if (ndm
->ndm_family
== AF_INET
|| ndm
->ndm_family
== AF_INET6
)
2225 return netlink_ipneigh_change(snl
, h
, len
);
2230 static int netlink_neigh_update2(struct interface
*ifp
, struct ipaddr
*ip
,
2231 struct ethaddr
*mac
, u_int32_t flags
, int cmd
)
2240 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
2241 char buf
[INET6_ADDRSTRLEN
];
2242 char buf2
[ETHER_ADDR_STRLEN
];
2244 memset(&req
.n
, 0, sizeof(req
.n
));
2245 memset(&req
.ndm
, 0, sizeof(req
.ndm
));
2247 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
2248 req
.n
.nlmsg_flags
= NLM_F_REQUEST
;
2249 if (cmd
== RTM_NEWNEIGH
)
2250 req
.n
.nlmsg_flags
|= (NLM_F_CREATE
| NLM_F_REPLACE
);
2251 req
.n
.nlmsg_type
= cmd
; // RTM_NEWNEIGH or RTM_DELNEIGH
2252 req
.ndm
.ndm_family
= IS_IPADDR_V4(ip
) ? AF_INET
: AF_INET6
;
2253 req
.ndm
.ndm_state
= flags
;
2254 req
.ndm
.ndm_ifindex
= ifp
->ifindex
;
2255 req
.ndm
.ndm_type
= RTN_UNICAST
;
2256 req
.ndm
.ndm_flags
= NTF_EXT_LEARNED
;
2259 ipa_len
= IS_IPADDR_V4(ip
) ? IPV4_MAX_BYTELEN
: IPV6_MAX_BYTELEN
;
2260 addattr_l(&req
.n
, sizeof(req
), NDA_DST
, &ip
->ip
.addr
, ipa_len
);
2262 addattr_l(&req
.n
, sizeof(req
), NDA_LLADDR
, mac
, 6);
2264 if (IS_ZEBRA_DEBUG_KERNEL
)
2265 zlog_debug("Tx %s family %s IF %s(%u) Neigh %s MAC %s",
2266 nl_msg_type_to_str(cmd
),
2267 nl_family_to_str(req
.ndm
.ndm_family
), ifp
->name
,
2268 ifp
->ifindex
, ipaddr2str(ip
, buf
, sizeof(buf
)),
2269 mac
? prefix_mac2str(mac
, buf2
, sizeof(buf2
))
2272 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
2276 int kernel_add_mac(struct interface
*ifp
, vlanid_t vid
, struct ethaddr
*mac
,
2277 struct in_addr vtep_ip
, u_char sticky
)
2279 return netlink_macfdb_update(ifp
, vid
, mac
, vtep_ip
, 0, RTM_NEWNEIGH
,
2283 int kernel_del_mac(struct interface
*ifp
, vlanid_t vid
, struct ethaddr
*mac
,
2284 struct in_addr vtep_ip
, int local
)
2286 return netlink_macfdb_update(ifp
, vid
, mac
, vtep_ip
, local
,
2290 int kernel_add_neigh(struct interface
*ifp
, struct ipaddr
*ip
,
2291 struct ethaddr
*mac
)
2293 return netlink_neigh_update2(ifp
, ip
, mac
, NUD_REACHABLE
, RTM_NEWNEIGH
);
2296 int kernel_del_neigh(struct interface
*ifp
, struct ipaddr
*ip
)
2298 return netlink_neigh_update2(ifp
, ip
, NULL
, 0, RTM_DELNEIGH
);
2302 * MPLS label forwarding table change via netlink interface.
2304 int netlink_mpls_multipath(int cmd
, zebra_lsp_t
*lsp
)
2307 zebra_nhlfe_t
*nhlfe
;
2308 struct nexthop
*nexthop
= NULL
;
2309 unsigned int nexthop_num
;
2310 const char *routedesc
;
2311 struct zebra_ns
*zns
= zebra_ns_lookup(NS_DEFAULT
);
2317 char buf
[NL_PKT_BUF_SIZE
];
2320 memset(&req
, 0, sizeof req
- NL_PKT_BUF_SIZE
);
2323 * Count # nexthops so we can decide whether to use singlepath
2324 * or multipath case.
2327 for (nhlfe
= lsp
->nhlfe_list
; nhlfe
; nhlfe
= nhlfe
->next
) {
2328 nexthop
= nhlfe
->nexthop
;
2331 if (cmd
== RTM_NEWROUTE
) {
2332 /* Count all selected NHLFEs */
2333 if (CHECK_FLAG(nhlfe
->flags
, NHLFE_FLAG_SELECTED
)
2334 && CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_ACTIVE
))
2338 /* Count all installed NHLFEs */
2339 if (CHECK_FLAG(nhlfe
->flags
, NHLFE_FLAG_INSTALLED
)
2340 && CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_FIB
))
2345 if ((nexthop_num
== 0) || (!lsp
->best_nhlfe
&& (cmd
!= RTM_DELROUTE
)))
2348 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2349 req
.n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
2350 req
.n
.nlmsg_type
= cmd
;
2351 req
.n
.nlmsg_pid
= zns
->netlink_cmd
.snl
.nl_pid
;
2353 req
.r
.rtm_family
= AF_MPLS
;
2354 req
.r
.rtm_table
= RT_TABLE_MAIN
;
2355 req
.r
.rtm_dst_len
= MPLS_LABEL_LEN_BITS
;
2356 req
.r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
2357 req
.r
.rtm_type
= RTN_UNICAST
;
2359 if (cmd
== RTM_NEWROUTE
) {
2360 /* We do a replace to handle update. */
2361 req
.n
.nlmsg_flags
|= NLM_F_REPLACE
;
2363 /* set the protocol value if installing */
2364 route_type
= re_type_from_lsp_type(lsp
->best_nhlfe
->type
);
2365 req
.r
.rtm_protocol
= zebra2proto(route_type
);
2368 /* Fill destination */
2369 lse
= mpls_lse_encode(lsp
->ile
.in_label
, 0, 0, 1);
2370 addattr_l(&req
.n
, sizeof req
, RTA_DST
, &lse
, sizeof(mpls_lse_t
));
2372 /* Fill nexthops (paths) based on single-path or multipath. The paths
2373 * chosen depend on the operation.
2375 if (nexthop_num
== 1 || multipath_num
== 1) {
2376 routedesc
= "single-path";
2377 _netlink_mpls_debug(cmd
, lsp
->ile
.in_label
, routedesc
);
2380 for (nhlfe
= lsp
->nhlfe_list
; nhlfe
; nhlfe
= nhlfe
->next
) {
2381 nexthop
= nhlfe
->nexthop
;
2385 if ((cmd
== RTM_NEWROUTE
2386 && (CHECK_FLAG(nhlfe
->flags
, NHLFE_FLAG_SELECTED
)
2387 && CHECK_FLAG(nexthop
->flags
,
2388 NEXTHOP_FLAG_ACTIVE
)))
2389 || (cmd
== RTM_DELROUTE
2390 && (CHECK_FLAG(nhlfe
->flags
,
2391 NHLFE_FLAG_INSTALLED
)
2392 && CHECK_FLAG(nexthop
->flags
,
2393 NEXTHOP_FLAG_FIB
)))) {
2394 /* Add the gateway */
2395 _netlink_mpls_build_singlepath(routedesc
, nhlfe
,
2398 if (cmd
== RTM_NEWROUTE
) {
2399 SET_FLAG(nhlfe
->flags
,
2400 NHLFE_FLAG_INSTALLED
);
2401 SET_FLAG(nexthop
->flags
,
2404 UNSET_FLAG(nhlfe
->flags
,
2405 NHLFE_FLAG_INSTALLED
);
2406 UNSET_FLAG(nexthop
->flags
,
2413 } else /* Multipath case */
2415 char buf
[NL_PKT_BUF_SIZE
];
2416 struct rtattr
*rta
= (void *)buf
;
2417 struct rtnexthop
*rtnh
;
2418 union g_addr
*src1
= NULL
;
2420 rta
->rta_type
= RTA_MULTIPATH
;
2421 rta
->rta_len
= RTA_LENGTH(0);
2422 rtnh
= RTA_DATA(rta
);
2424 routedesc
= "multipath";
2425 _netlink_mpls_debug(cmd
, lsp
->ile
.in_label
, routedesc
);
2428 for (nhlfe
= lsp
->nhlfe_list
; nhlfe
; nhlfe
= nhlfe
->next
) {
2429 nexthop
= nhlfe
->nexthop
;
2433 if (nexthop_num
>= multipath_num
)
2436 if ((cmd
== RTM_NEWROUTE
2437 && (CHECK_FLAG(nhlfe
->flags
, NHLFE_FLAG_SELECTED
)
2438 && CHECK_FLAG(nexthop
->flags
,
2439 NEXTHOP_FLAG_ACTIVE
)))
2440 || (cmd
== RTM_DELROUTE
2441 && (CHECK_FLAG(nhlfe
->flags
,
2442 NHLFE_FLAG_INSTALLED
)
2443 && CHECK_FLAG(nexthop
->flags
,
2444 NEXTHOP_FLAG_FIB
)))) {
2447 /* Build the multipath */
2448 _netlink_mpls_build_multipath(routedesc
, nhlfe
,
2451 rtnh
= RTNH_NEXT(rtnh
);
2453 if (cmd
== RTM_NEWROUTE
) {
2454 SET_FLAG(nhlfe
->flags
,
2455 NHLFE_FLAG_INSTALLED
);
2456 SET_FLAG(nexthop
->flags
,
2459 UNSET_FLAG(nhlfe
->flags
,
2460 NHLFE_FLAG_INSTALLED
);
2461 UNSET_FLAG(nexthop
->flags
,
2467 /* Add the multipath */
2468 if (rta
->rta_len
> RTA_LENGTH(0))
2469 addattr_l(&req
.n
, NL_PKT_BUF_SIZE
, RTA_MULTIPATH
,
2470 RTA_DATA(rta
), RTA_PAYLOAD(rta
));
2473 /* Talk to netlink socket. */
2474 return netlink_talk(netlink_talk_filter
, &req
.n
, &zns
->netlink_cmd
, zns
,
2477 #endif /* HAVE_NETLINK */