]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm_netlink.c
Merge pull request #7220 from idryzhov/fix-clear-isis
[mirror_frr.git] / zebra / zebra_fpm_netlink.c
1 /*
2 * Code for encoding/decoding FPM messages that are in netlink format.
3 *
4 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
5 * Copyright (C) 2012 by Open Source Routing.
6 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
7 *
8 * This file is part of GNU Zebra.
9 *
10 * GNU Zebra is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2, or (at your option) any
13 * later version.
14 *
15 * GNU Zebra is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; see the file COPYING; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <zebra.h>
26
27 #ifdef HAVE_NETLINK
28
29 #include "log.h"
30 #include "rib.h"
31 #include "vty.h"
32 #include "prefix.h"
33
34 #include "zebra/zserv.h"
35 #include "zebra/zebra_router.h"
36 #include "zebra/zebra_dplane.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/kernel_netlink.h"
40 #include "zebra/rt_netlink.h"
41 #include "nexthop.h"
42
43 #include "zebra/zebra_fpm_private.h"
44 #include "zebra/zebra_vxlan_private.h"
45
46 /*
47 * addr_to_a
48 *
49 * Returns string representation of an address of the given AF.
50 */
51 static inline const char *addr_to_a(uint8_t af, void *addr)
52 {
53 if (!addr)
54 return "<No address>";
55
56 switch (af) {
57
58 case AF_INET:
59 return inet_ntoa(*((struct in_addr *)addr));
60 case AF_INET6:
61 return inet6_ntoa(*((struct in6_addr *)addr));
62 default:
63 return "<Addr in unknown AF>";
64 }
65 }
66
67 /*
68 * prefix_addr_to_a
69 *
70 * Convience wrapper that returns a human-readable string for the
71 * address in a prefix.
72 */
73 static const char *prefix_addr_to_a(struct prefix *prefix)
74 {
75 if (!prefix)
76 return "<No address>";
77
78 return addr_to_a(prefix->family, &prefix->u.prefix);
79 }
80
81 /*
82 * af_addr_size
83 *
84 * The size of an address in a given address family.
85 */
86 static size_t af_addr_size(uint8_t af)
87 {
88 switch (af) {
89
90 case AF_INET:
91 return 4;
92 case AF_INET6:
93 return 16;
94 default:
95 assert(0);
96 return 16;
97 }
98 }
99
100 /*
101 * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
102 * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
103 * So, we cannot use these values for VxLAN encap.
104 */
105 enum fpm_nh_encap_type_t {
106 FPM_NH_ENCAP_NONE = 0,
107 FPM_NH_ENCAP_VXLAN = 100,
108 FPM_NH_ENCAP_MAX,
109 };
110
111 /*
112 * fpm_nh_encap_type_to_str
113 */
114 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type)
115 {
116 switch (encap_type) {
117 case FPM_NH_ENCAP_NONE:
118 return "none";
119
120 case FPM_NH_ENCAP_VXLAN:
121 return "VxLAN";
122
123 case FPM_NH_ENCAP_MAX:
124 return "invalid";
125 }
126
127 return "invalid";
128 }
129
130 struct vxlan_encap_info_t {
131 vni_t vni;
132 };
133
134 enum vxlan_encap_info_type_t {
135 VXLAN_VNI = 0,
136 };
137
138 struct fpm_nh_encap_info_t {
139 enum fpm_nh_encap_type_t encap_type;
140 union {
141 struct vxlan_encap_info_t vxlan_encap;
142 };
143 };
144
145 /*
146 * netlink_nh_info
147 *
148 * Holds information about a single nexthop for netlink. These info
149 * structures are transient and may contain pointers into rib
150 * data structures for convenience.
151 */
152 struct netlink_nh_info {
153 uint32_t if_index;
154 union g_addr *gateway;
155
156 /*
157 * Information from the struct nexthop from which this nh was
158 * derived. For debug purposes only.
159 */
160 int recursive;
161 enum nexthop_types_t type;
162 struct fpm_nh_encap_info_t encap_info;
163 };
164
165 /*
166 * netlink_route_info
167 *
168 * A structure for holding information for a netlink route message.
169 */
170 struct netlink_route_info {
171 uint16_t nlmsg_type;
172 uint8_t rtm_type;
173 uint32_t rtm_table;
174 uint8_t rtm_protocol;
175 uint8_t af;
176 struct prefix *prefix;
177 uint32_t *metric;
178 unsigned int num_nhs;
179
180 /*
181 * Nexthop structures
182 */
183 struct netlink_nh_info nhs[MULTIPATH_NUM];
184 union g_addr *pref_src;
185 };
186
187 /*
188 * netlink_route_info_add_nh
189 *
190 * Add information about the given nexthop to the given route info
191 * structure.
192 *
193 * Returns true if a nexthop was added, false otherwise.
194 */
195 static int netlink_route_info_add_nh(struct netlink_route_info *ri,
196 struct nexthop *nexthop,
197 struct route_entry *re)
198 {
199 struct netlink_nh_info nhi;
200 union g_addr *src;
201 zebra_l3vni_t *zl3vni = NULL;
202
203 memset(&nhi, 0, sizeof(nhi));
204 src = NULL;
205
206 if (ri->num_nhs >= (int)array_size(ri->nhs))
207 return 0;
208
209 nhi.recursive = nexthop->rparent ? 1 : 0;
210 nhi.type = nexthop->type;
211 nhi.if_index = nexthop->ifindex;
212
213 if (nexthop->type == NEXTHOP_TYPE_IPV4
214 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
215 nhi.gateway = &nexthop->gate;
216 if (nexthop->src.ipv4.s_addr != INADDR_ANY)
217 src = &nexthop->src;
218 }
219
220 if (nexthop->type == NEXTHOP_TYPE_IPV6
221 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
222 nhi.gateway = &nexthop->gate;
223 }
224
225 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
226 if (nexthop->src.ipv4.s_addr != INADDR_ANY)
227 src = &nexthop->src;
228 }
229
230 if (!nhi.gateway && nhi.if_index == 0)
231 return 0;
232
233 if (re && CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) {
234 nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN;
235
236 zl3vni = zl3vni_from_vrf(nexthop->vrf_id);
237 if (zl3vni && is_l3vni_oper_up(zl3vni)) {
238
239 /* Add VNI to VxLAN encap info */
240 nhi.encap_info.vxlan_encap.vni = zl3vni->vni;
241 }
242 }
243
244 /*
245 * We have a valid nhi. Copy the structure over to the route_info.
246 */
247 ri->nhs[ri->num_nhs] = nhi;
248 ri->num_nhs++;
249
250 if (src && !ri->pref_src)
251 ri->pref_src = src;
252
253 return 1;
254 }
255
256 /*
257 * netlink_proto_from_route_type
258 */
259 static uint8_t netlink_proto_from_route_type(int type)
260 {
261 switch (type) {
262 case ZEBRA_ROUTE_KERNEL:
263 case ZEBRA_ROUTE_CONNECT:
264 return RTPROT_KERNEL;
265
266 default:
267 return RTPROT_ZEBRA;
268 }
269 }
270
271 /*
272 * netlink_route_info_fill
273 *
274 * Fill out the route information object from the given route.
275 *
276 * Returns true on success and false on failure.
277 */
278 static int netlink_route_info_fill(struct netlink_route_info *ri, int cmd,
279 rib_dest_t *dest, struct route_entry *re)
280 {
281 struct nexthop *nexthop;
282
283 memset(ri, 0, sizeof(*ri));
284
285 ri->prefix = rib_dest_prefix(dest);
286 ri->af = rib_dest_af(dest);
287
288 ri->nlmsg_type = cmd;
289 ri->rtm_table = rib_table_info(rib_dest_table(dest))->table_id;
290 ri->rtm_protocol = RTPROT_UNSPEC;
291
292 /*
293 * An RTM_DELROUTE need not be accompanied by any nexthops,
294 * particularly in our communication with the FPM.
295 */
296 if (cmd == RTM_DELROUTE && !re)
297 return 1;
298
299 if (!re) {
300 zfpm_debug("%s: Expected non-NULL re pointer", __func__);
301 return 0;
302 }
303
304 ri->rtm_protocol = netlink_proto_from_route_type(re->type);
305 ri->rtm_type = RTN_UNICAST;
306 ri->metric = &re->metric;
307
308 for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) {
309 if (ri->num_nhs >= zrouter.multipath_num)
310 break;
311
312 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
313 continue;
314
315 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
316 switch (nexthop->bh_type) {
317 case BLACKHOLE_ADMINPROHIB:
318 ri->rtm_type = RTN_PROHIBIT;
319 break;
320 case BLACKHOLE_REJECT:
321 ri->rtm_type = RTN_UNREACHABLE;
322 break;
323 case BLACKHOLE_NULL:
324 default:
325 ri->rtm_type = RTN_BLACKHOLE;
326 break;
327 }
328 }
329
330 if ((cmd == RTM_NEWROUTE
331 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
332 || (cmd == RTM_DELROUTE
333 && CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED))) {
334 netlink_route_info_add_nh(ri, nexthop, re);
335 }
336 }
337
338 if (ri->num_nhs == 0) {
339 switch (ri->rtm_type) {
340 case RTN_PROHIBIT:
341 case RTN_UNREACHABLE:
342 case RTN_BLACKHOLE:
343 break;
344 default:
345 /* If there is no useful nexthop then return. */
346 zfpm_debug(
347 "netlink_encode_route(): No useful nexthop.");
348 return 0;
349 }
350 }
351
352 return 1;
353 }
354
355 /*
356 * netlink_route_info_encode
357 *
358 * Returns the number of bytes written to the buffer. 0 or a negative
359 * value indicates an error.
360 */
361 static int netlink_route_info_encode(struct netlink_route_info *ri,
362 char *in_buf, size_t in_buf_len)
363 {
364 size_t bytelen;
365 unsigned int nexthop_num = 0;
366 size_t buf_offset;
367 struct netlink_nh_info *nhi;
368 enum fpm_nh_encap_type_t encap;
369 struct rtattr *nest, *inner_nest;
370 struct rtnexthop *rtnh;
371 struct vxlan_encap_info_t *vxlan;
372 struct in6_addr ipv6;
373
374 struct {
375 struct nlmsghdr n;
376 struct rtmsg r;
377 char buf[1];
378 } * req;
379
380 req = (void *)in_buf;
381
382 buf_offset = ((char *)req->buf) - ((char *)req);
383
384 if (in_buf_len < buf_offset) {
385 assert(0);
386 return 0;
387 }
388
389 memset(req, 0, buf_offset);
390
391 bytelen = af_addr_size(ri->af);
392
393 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
394 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
395 req->n.nlmsg_type = ri->nlmsg_type;
396 req->r.rtm_family = ri->af;
397
398 /*
399 * rtm_table field is a uchar field which can accomodate table_id less
400 * than 256.
401 * To support table id greater than 255, if the table_id is greater than
402 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
403 * with 32 bit value as the table_id.
404 */
405 if (ri->rtm_table < 256)
406 req->r.rtm_table = ri->rtm_table;
407 else {
408 req->r.rtm_table = RT_TABLE_UNSPEC;
409 nl_attr_put32(&req->n, in_buf_len, RTA_TABLE, ri->rtm_table);
410 }
411
412 req->r.rtm_dst_len = ri->prefix->prefixlen;
413 req->r.rtm_protocol = ri->rtm_protocol;
414 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
415
416 nl_attr_put(&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix,
417 bytelen);
418
419 req->r.rtm_type = ri->rtm_type;
420
421 /* Metric. */
422 if (ri->metric)
423 nl_attr_put32(&req->n, in_buf_len, RTA_PRIORITY, *ri->metric);
424
425 if (ri->num_nhs == 0)
426 goto done;
427
428 if (ri->num_nhs == 1) {
429 nhi = &ri->nhs[0];
430
431 if (nhi->gateway) {
432 if (nhi->type == NEXTHOP_TYPE_IPV4_IFINDEX
433 && ri->af == AF_INET6) {
434 ipv4_to_ipv4_mapped_ipv6(&ipv6,
435 nhi->gateway->ipv4);
436 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
437 &ipv6, bytelen);
438 } else
439 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
440 nhi->gateway, bytelen);
441 }
442
443 if (nhi->if_index) {
444 nl_attr_put32(&req->n, in_buf_len, RTA_OIF,
445 nhi->if_index);
446 }
447
448 encap = nhi->encap_info.encap_type;
449 switch (encap) {
450 case FPM_NH_ENCAP_NONE:
451 case FPM_NH_ENCAP_MAX:
452 break;
453 case FPM_NH_ENCAP_VXLAN:
454 nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
455 encap);
456 vxlan = &nhi->encap_info.vxlan_encap;
457 nest = nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
458 nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
459 vxlan->vni);
460 nl_attr_nest_end(&req->n, nest);
461 break;
462 }
463
464 goto done;
465 }
466
467 /*
468 * Multipath case.
469 */
470 nest = nl_attr_nest(&req->n, in_buf_len, RTA_MULTIPATH);
471
472 for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) {
473 rtnh = nl_attr_rtnh(&req->n, in_buf_len);
474 nhi = &ri->nhs[nexthop_num];
475
476 if (nhi->gateway)
477 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
478 nhi->gateway, bytelen);
479
480 if (nhi->if_index) {
481 rtnh->rtnh_ifindex = nhi->if_index;
482 }
483
484 encap = nhi->encap_info.encap_type;
485 switch (encap) {
486 case FPM_NH_ENCAP_NONE:
487 case FPM_NH_ENCAP_MAX:
488 break;
489 case FPM_NH_ENCAP_VXLAN:
490 nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
491 encap);
492 vxlan = &nhi->encap_info.vxlan_encap;
493 inner_nest =
494 nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
495 nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
496 vxlan->vni);
497 nl_attr_nest_end(&req->n, inner_nest);
498 break;
499 }
500
501 nl_attr_rtnh_end(&req->n, rtnh);
502 }
503
504 nl_attr_nest_end(&req->n, nest);
505 assert(nest->rta_len > RTA_LENGTH(0));
506
507 done:
508
509 if (ri->pref_src) {
510 nl_attr_put(&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src,
511 bytelen);
512 }
513
514 assert(req->n.nlmsg_len < in_buf_len);
515 return req->n.nlmsg_len;
516 }
517
518 /*
519 * zfpm_log_route_info
520 *
521 * Helper function to log the information in a route_info structure.
522 */
523 static void zfpm_log_route_info(struct netlink_route_info *ri,
524 const char *label)
525 {
526 struct netlink_nh_info *nhi;
527 unsigned int i;
528
529 zfpm_debug("%s : %s %s/%d, Proto: %s, Metric: %u", label,
530 nl_msg_type_to_str(ri->nlmsg_type),
531 prefix_addr_to_a(ri->prefix), ri->prefix->prefixlen,
532 nl_rtproto_to_str(ri->rtm_protocol),
533 ri->metric ? *ri->metric : 0);
534
535 for (i = 0; i < ri->num_nhs; i++) {
536 nhi = &ri->nhs[i];
537 zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
538 nhi->if_index, addr_to_a(ri->af, nhi->gateway),
539 nhi->recursive ? "yes" : "no",
540 nexthop_type_to_str(nhi->type),
541 fpm_nh_encap_type_to_str(nhi->encap_info.encap_type)
542 );
543 }
544 }
545
546 /*
547 * zfpm_netlink_encode_route
548 *
549 * Create a netlink message corresponding to the given route in the
550 * given buffer space.
551 *
552 * Returns the number of bytes written to the buffer. 0 or a negative
553 * value indicates an error.
554 */
555 int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re,
556 char *in_buf, size_t in_buf_len)
557 {
558 struct netlink_route_info ri_space, *ri;
559
560 ri = &ri_space;
561
562 if (!netlink_route_info_fill(ri, cmd, dest, re))
563 return 0;
564
565 zfpm_log_route_info(ri, __func__);
566
567 return netlink_route_info_encode(ri, in_buf, in_buf_len);
568 }
569
570 /*
571 * zfpm_netlink_encode_mac
572 *
573 * Create a netlink message corresponding to the given MAC.
574 *
575 * Returns the number of bytes written to the buffer. 0 or a negative
576 * value indicates an error.
577 */
578 int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
579 size_t in_buf_len)
580 {
581 char buf1[ETHER_ADDR_STRLEN];
582 size_t buf_offset;
583
584 struct macmsg {
585 struct nlmsghdr hdr;
586 struct ndmsg ndm;
587 char buf[0];
588 } *req;
589 req = (void *)in_buf;
590
591 buf_offset = offsetof(struct macmsg, buf);
592 if (in_buf_len < buf_offset)
593 return 0;
594 memset(req, 0, buf_offset);
595
596 /* Construct nlmsg header */
597 req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
598 req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ?
599 RTM_DELNEIGH : RTM_NEWNEIGH;
600 req->hdr.nlmsg_flags = NLM_F_REQUEST;
601 if (req->hdr.nlmsg_type == RTM_NEWNEIGH)
602 req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
603
604 /* Construct ndmsg */
605 req->ndm.ndm_family = AF_BRIDGE;
606 req->ndm.ndm_ifindex = mac->vxlan_if;
607
608 req->ndm.ndm_state = NUD_REACHABLE;
609 req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
610 if (CHECK_FLAG(mac->zebra_flags,
611 (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)))
612 req->ndm.ndm_state |= NUD_NOARP;
613 else
614 req->ndm.ndm_flags |= NTF_EXT_LEARNED;
615
616 /* Add attributes */
617 nl_attr_put(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6);
618 nl_attr_put(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4);
619 nl_attr_put32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if);
620 nl_attr_put32(&req->hdr, in_buf_len, NDA_VNI, mac->vni);
621
622 assert(req->hdr.nlmsg_len < in_buf_len);
623
624 zfpm_debug("Tx %s family %s ifindex %u MAC %s DEST %s",
625 nl_msg_type_to_str(req->hdr.nlmsg_type),
626 nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex,
627 prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)),
628 inet_ntoa(mac->r_vtep_ip));
629
630 return req->hdr.nlmsg_len;
631 }
632
633 #endif /* HAVE_NETLINK */