]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm_netlink.c
Merge pull request #5706 from mjstapp/fix_nh_debug_show
[mirror_frr.git] / zebra / zebra_fpm_netlink.c
1 /*
2 * Code for encoding/decoding FPM messages that are in netlink format.
3 *
4 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
5 * Copyright (C) 2012 by Open Source Routing.
6 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
7 *
8 * This file is part of GNU Zebra.
9 *
10 * GNU Zebra is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2, or (at your option) any
13 * later version.
14 *
15 * GNU Zebra is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; see the file COPYING; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <zebra.h>
26
27 #ifdef HAVE_NETLINK
28
29 #include "log.h"
30 #include "rib.h"
31 #include "vty.h"
32 #include "prefix.h"
33
34 #include "zebra/zserv.h"
35 #include "zebra/zebra_router.h"
36 #include "zebra/zebra_dplane.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/kernel_netlink.h"
40 #include "zebra/rt_netlink.h"
41 #include "nexthop.h"
42
43 #include "zebra/zebra_fpm_private.h"
44 #include "zebra/zebra_vxlan_private.h"
45
46 /*
47 * addr_to_a
48 *
49 * Returns string representation of an address of the given AF.
50 */
51 static inline const char *addr_to_a(uint8_t af, void *addr)
52 {
53 if (!addr)
54 return "<No address>";
55
56 switch (af) {
57
58 case AF_INET:
59 return inet_ntoa(*((struct in_addr *)addr));
60 break;
61 case AF_INET6:
62 return inet6_ntoa(*((struct in6_addr *)addr));
63 break;
64 default:
65 return "<Addr in unknown AF>";
66 break;
67 }
68 }
69
70 /*
71 * prefix_addr_to_a
72 *
73 * Convience wrapper that returns a human-readable string for the
74 * address in a prefix.
75 */
76 static const char *prefix_addr_to_a(struct prefix *prefix)
77 {
78 if (!prefix)
79 return "<No address>";
80
81 return addr_to_a(prefix->family, &prefix->u.prefix);
82 }
83
84 /*
85 * af_addr_size
86 *
87 * The size of an address in a given address family.
88 */
89 static size_t af_addr_size(uint8_t af)
90 {
91 switch (af) {
92
93 case AF_INET:
94 return 4;
95 break;
96 case AF_INET6:
97 return 16;
98 break;
99 default:
100 assert(0);
101 return 16;
102 }
103 }
104
105 /*
106 * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
107 * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
108 * So, we cannot use these values for VxLAN encap.
109 */
110 enum fpm_nh_encap_type_t {
111 FPM_NH_ENCAP_NONE = 0,
112 FPM_NH_ENCAP_VXLAN = 100,
113 FPM_NH_ENCAP_MAX,
114 };
115
116 /*
117 * fpm_nh_encap_type_to_str
118 */
119 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type)
120 {
121 switch (encap_type) {
122 case FPM_NH_ENCAP_NONE:
123 return "none";
124
125 case FPM_NH_ENCAP_VXLAN:
126 return "VxLAN";
127
128 case FPM_NH_ENCAP_MAX:
129 return "invalid";
130 }
131
132 return "invalid";
133 }
134
135 struct vxlan_encap_info_t {
136 vni_t vni;
137 };
138
139 enum vxlan_encap_info_type_t {
140 VXLAN_VNI = 0,
141 };
142
143 struct fpm_nh_encap_info_t {
144 enum fpm_nh_encap_type_t encap_type;
145 union {
146 struct vxlan_encap_info_t vxlan_encap;
147 };
148 };
149
150 /*
151 * netlink_nh_info_t
152 *
153 * Holds information about a single nexthop for netlink. These info
154 * structures are transient and may contain pointers into rib
155 * data structures for convenience.
156 */
157 typedef struct netlink_nh_info_t_ {
158 uint32_t if_index;
159 union g_addr *gateway;
160
161 /*
162 * Information from the struct nexthop from which this nh was
163 * derived. For debug purposes only.
164 */
165 int recursive;
166 enum nexthop_types_t type;
167 struct fpm_nh_encap_info_t encap_info;
168 } netlink_nh_info_t;
169
170 /*
171 * netlink_route_info_t
172 *
173 * A structure for holding information for a netlink route message.
174 */
175 typedef struct netlink_route_info_t_ {
176 uint16_t nlmsg_type;
177 uint8_t rtm_type;
178 uint32_t rtm_table;
179 uint8_t rtm_protocol;
180 uint8_t af;
181 struct prefix *prefix;
182 uint32_t *metric;
183 unsigned int num_nhs;
184
185 /*
186 * Nexthop structures
187 */
188 netlink_nh_info_t nhs[MULTIPATH_NUM];
189 union g_addr *pref_src;
190 } netlink_route_info_t;
191
192 /*
193 * netlink_route_info_add_nh
194 *
195 * Add information about the given nexthop to the given route info
196 * structure.
197 *
198 * Returns true if a nexthop was added, false otherwise.
199 */
200 static int netlink_route_info_add_nh(netlink_route_info_t *ri,
201 struct nexthop *nexthop,
202 struct route_entry *re)
203 {
204 netlink_nh_info_t nhi;
205 union g_addr *src;
206 zebra_l3vni_t *zl3vni = NULL;
207
208 memset(&nhi, 0, sizeof(nhi));
209 src = NULL;
210
211 if (ri->num_nhs >= (int)array_size(ri->nhs))
212 return 0;
213
214 nhi.recursive = nexthop->rparent ? 1 : 0;
215 nhi.type = nexthop->type;
216 nhi.if_index = nexthop->ifindex;
217
218 if (nexthop->type == NEXTHOP_TYPE_IPV4
219 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
220 nhi.gateway = &nexthop->gate;
221 if (nexthop->src.ipv4.s_addr)
222 src = &nexthop->src;
223 }
224
225 if (nexthop->type == NEXTHOP_TYPE_IPV6
226 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
227 nhi.gateway = &nexthop->gate;
228 }
229
230 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
231 if (nexthop->src.ipv4.s_addr)
232 src = &nexthop->src;
233 }
234
235 if (!nhi.gateway && nhi.if_index == 0)
236 return 0;
237
238 if (re && CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) {
239 nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN;
240
241 zl3vni = zl3vni_from_vrf(nexthop->vrf_id);
242 if (zl3vni && is_l3vni_oper_up(zl3vni)) {
243
244 /* Add VNI to VxLAN encap info */
245 nhi.encap_info.vxlan_encap.vni = zl3vni->vni;
246 }
247 }
248
249 /*
250 * We have a valid nhi. Copy the structure over to the route_info.
251 */
252 ri->nhs[ri->num_nhs] = nhi;
253 ri->num_nhs++;
254
255 if (src && !ri->pref_src)
256 ri->pref_src = src;
257
258 return 1;
259 }
260
261 /*
262 * netlink_proto_from_route_type
263 */
264 static uint8_t netlink_proto_from_route_type(int type)
265 {
266 switch (type) {
267 case ZEBRA_ROUTE_KERNEL:
268 case ZEBRA_ROUTE_CONNECT:
269 return RTPROT_KERNEL;
270
271 default:
272 return RTPROT_ZEBRA;
273 }
274 }
275
276 /*
277 * netlink_route_info_fill
278 *
279 * Fill out the route information object from the given route.
280 *
281 * Returns true on success and false on failure.
282 */
283 static int netlink_route_info_fill(netlink_route_info_t *ri, int cmd,
284 rib_dest_t *dest, struct route_entry *re)
285 {
286 struct nexthop *nexthop;
287 struct zebra_vrf *zvrf;
288
289 memset(ri, 0, sizeof(*ri));
290
291 ri->prefix = rib_dest_prefix(dest);
292 ri->af = rib_dest_af(dest);
293
294 ri->nlmsg_type = cmd;
295 zvrf = rib_dest_vrf(dest);
296 if (zvrf)
297 ri->rtm_table = zvrf->table_id;
298 ri->rtm_protocol = RTPROT_UNSPEC;
299
300 /*
301 * An RTM_DELROUTE need not be accompanied by any nexthops,
302 * particularly in our communication with the FPM.
303 */
304 if (cmd == RTM_DELROUTE && !re)
305 return 1;
306
307 if (!re) {
308 zfpm_debug("%s: Expected non-NULL re pointer",
309 __PRETTY_FUNCTION__);
310 return 0;
311 }
312
313 ri->rtm_protocol = netlink_proto_from_route_type(re->type);
314 ri->rtm_type = RTN_UNICAST;
315 ri->metric = &re->metric;
316
317 for (ALL_NEXTHOPS_PTR(re->nhe->nhg, nexthop)) {
318 if (ri->num_nhs >= zrouter.multipath_num)
319 break;
320
321 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
322 continue;
323
324 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
325 switch (nexthop->bh_type) {
326 case BLACKHOLE_ADMINPROHIB:
327 ri->rtm_type = RTN_PROHIBIT;
328 break;
329 case BLACKHOLE_REJECT:
330 ri->rtm_type = RTN_UNREACHABLE;
331 break;
332 case BLACKHOLE_NULL:
333 default:
334 ri->rtm_type = RTN_BLACKHOLE;
335 break;
336 }
337 }
338
339 if ((cmd == RTM_NEWROUTE
340 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
341 || (cmd == RTM_DELROUTE
342 && CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED))) {
343 netlink_route_info_add_nh(ri, nexthop, re);
344 }
345 }
346
347 /* If there is no useful nexthop then return. */
348 if (ri->num_nhs == 0) {
349 zfpm_debug("netlink_encode_route(): No useful nexthop.");
350 return 0;
351 }
352
353 return 1;
354 }
355
356 /*
357 * netlink_route_info_encode
358 *
359 * Returns the number of bytes written to the buffer. 0 or a negative
360 * value indicates an error.
361 */
362 static int netlink_route_info_encode(netlink_route_info_t *ri, char *in_buf,
363 size_t in_buf_len)
364 {
365 size_t bytelen;
366 unsigned int nexthop_num = 0;
367 size_t buf_offset;
368 netlink_nh_info_t *nhi;
369 enum fpm_nh_encap_type_t encap;
370 struct rtattr *nest;
371 struct vxlan_encap_info_t *vxlan;
372 int nest_len;
373
374 struct {
375 struct nlmsghdr n;
376 struct rtmsg r;
377 char buf[1];
378 } * req;
379
380 req = (void *)in_buf;
381
382 buf_offset = ((char *)req->buf) - ((char *)req);
383
384 if (in_buf_len < buf_offset) {
385 assert(0);
386 return 0;
387 }
388
389 memset(req, 0, buf_offset);
390
391 bytelen = af_addr_size(ri->af);
392
393 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
394 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
395 req->n.nlmsg_type = ri->nlmsg_type;
396 req->r.rtm_family = ri->af;
397
398 /*
399 * rtm_table field is a uchar field which can accomodate table_id less
400 * than 256.
401 * To support table id greater than 255, if the table_id is greater than
402 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
403 * with 32 bit value as the table_id.
404 */
405 if (ri->rtm_table < 256)
406 req->r.rtm_table = ri->rtm_table;
407 else {
408 req->r.rtm_table = RT_TABLE_UNSPEC;
409 addattr32(&req->n, in_buf_len, RTA_TABLE, ri->rtm_table);
410 }
411
412 req->r.rtm_dst_len = ri->prefix->prefixlen;
413 req->r.rtm_protocol = ri->rtm_protocol;
414 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
415
416 addattr_l(&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix, bytelen);
417
418 req->r.rtm_type = ri->rtm_type;
419
420 /* Metric. */
421 if (ri->metric)
422 addattr32(&req->n, in_buf_len, RTA_PRIORITY, *ri->metric);
423
424 if (ri->num_nhs == 0)
425 goto done;
426
427 if (ri->num_nhs == 1) {
428 nhi = &ri->nhs[0];
429
430 if (nhi->gateway) {
431 addattr_l(&req->n, in_buf_len, RTA_GATEWAY,
432 nhi->gateway, bytelen);
433 }
434
435 if (nhi->if_index) {
436 addattr32(&req->n, in_buf_len, RTA_OIF, nhi->if_index);
437 }
438
439 encap = nhi->encap_info.encap_type;
440 if (encap > FPM_NH_ENCAP_NONE) {
441 addattr_l(&req->n, in_buf_len, RTA_ENCAP_TYPE, &encap,
442 sizeof(uint16_t));
443 switch (encap) {
444 case FPM_NH_ENCAP_NONE:
445 break;
446 case FPM_NH_ENCAP_VXLAN:
447 vxlan = &nhi->encap_info.vxlan_encap;
448 nest = addattr_nest(&req->n, in_buf_len,
449 RTA_ENCAP);
450 addattr32(&req->n, in_buf_len, VXLAN_VNI,
451 vxlan->vni);
452 addattr_nest_end(&req->n, nest);
453 break;
454 case FPM_NH_ENCAP_MAX:
455 break;
456 }
457 }
458
459 goto done;
460 }
461
462 /*
463 * Multipath case.
464 */
465 char buf[NL_PKT_BUF_SIZE];
466 struct rtattr *rta = (void *)buf;
467 struct rtnexthop *rtnh;
468
469 rta->rta_type = RTA_MULTIPATH;
470 rta->rta_len = RTA_LENGTH(0);
471 rtnh = RTA_DATA(rta);
472
473 for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) {
474 nhi = &ri->nhs[nexthop_num];
475
476 rtnh->rtnh_len = sizeof(*rtnh);
477 rtnh->rtnh_flags = 0;
478 rtnh->rtnh_hops = 0;
479 rtnh->rtnh_ifindex = 0;
480 rta->rta_len += rtnh->rtnh_len;
481
482 if (nhi->gateway) {
483 rta_addattr_l(rta, sizeof(buf), RTA_GATEWAY,
484 nhi->gateway, bytelen);
485 rtnh->rtnh_len += sizeof(struct rtattr) + bytelen;
486 }
487
488 if (nhi->if_index) {
489 rtnh->rtnh_ifindex = nhi->if_index;
490 }
491
492 encap = nhi->encap_info.encap_type;
493 if (encap > FPM_NH_ENCAP_NONE) {
494 rta_addattr_l(rta, sizeof(buf), RTA_ENCAP_TYPE,
495 &encap, sizeof(uint16_t));
496 rtnh->rtnh_len += sizeof(struct rtattr) +
497 sizeof(uint16_t);
498 switch (encap) {
499 case FPM_NH_ENCAP_NONE:
500 break;
501 case FPM_NH_ENCAP_VXLAN:
502 vxlan = &nhi->encap_info.vxlan_encap;
503 nest = rta_nest(rta, sizeof(buf), RTA_ENCAP);
504 rta_addattr_l(rta, sizeof(buf), VXLAN_VNI,
505 &vxlan->vni, sizeof(uint32_t));
506 nest_len = rta_nest_end(rta, nest);
507 rtnh->rtnh_len += nest_len;
508 break;
509 case FPM_NH_ENCAP_MAX:
510 break;
511 }
512 }
513
514 rtnh = RTNH_NEXT(rtnh);
515 }
516
517 assert(rta->rta_len > RTA_LENGTH(0));
518 addattr_l(&req->n, in_buf_len, RTA_MULTIPATH, RTA_DATA(rta),
519 RTA_PAYLOAD(rta));
520
521 done:
522
523 if (ri->pref_src) {
524 addattr_l(&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src,
525 bytelen);
526 }
527
528 assert(req->n.nlmsg_len < in_buf_len);
529 return req->n.nlmsg_len;
530 }
531
532 /*
533 * zfpm_log_route_info
534 *
535 * Helper function to log the information in a route_info structure.
536 */
537 static void zfpm_log_route_info(netlink_route_info_t *ri, const char *label)
538 {
539 netlink_nh_info_t *nhi;
540 unsigned int i;
541
542 zfpm_debug("%s : %s %s/%d, Proto: %s, Metric: %u", label,
543 nl_msg_type_to_str(ri->nlmsg_type),
544 prefix_addr_to_a(ri->prefix), ri->prefix->prefixlen,
545 nl_rtproto_to_str(ri->rtm_protocol),
546 ri->metric ? *ri->metric : 0);
547
548 for (i = 0; i < ri->num_nhs; i++) {
549 nhi = &ri->nhs[i];
550 zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
551 nhi->if_index, addr_to_a(ri->af, nhi->gateway),
552 nhi->recursive ? "yes" : "no",
553 nexthop_type_to_str(nhi->type),
554 fpm_nh_encap_type_to_str(nhi->encap_info.encap_type)
555 );
556 }
557 }
558
559 /*
560 * zfpm_netlink_encode_route
561 *
562 * Create a netlink message corresponding to the given route in the
563 * given buffer space.
564 *
565 * Returns the number of bytes written to the buffer. 0 or a negative
566 * value indicates an error.
567 */
568 int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re,
569 char *in_buf, size_t in_buf_len)
570 {
571 netlink_route_info_t ri_space, *ri;
572
573 ri = &ri_space;
574
575 if (!netlink_route_info_fill(ri, cmd, dest, re))
576 return 0;
577
578 zfpm_log_route_info(ri, __FUNCTION__);
579
580 return netlink_route_info_encode(ri, in_buf, in_buf_len);
581 }
582
583 /*
584 * zfpm_netlink_encode_mac
585 *
586 * Create a netlink message corresponding to the given MAC.
587 *
588 * Returns the number of bytes written to the buffer. 0 or a negative
589 * value indicates an error.
590 */
591 int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
592 size_t in_buf_len)
593 {
594 char buf1[ETHER_ADDR_STRLEN];
595 size_t buf_offset;
596
597 struct macmsg {
598 struct nlmsghdr hdr;
599 struct ndmsg ndm;
600 char buf[0];
601 } *req;
602 req = (void *)in_buf;
603
604 buf_offset = offsetof(struct macmsg, buf);
605 if (in_buf_len < buf_offset)
606 return 0;
607 memset(req, 0, buf_offset);
608
609 /* Construct nlmsg header */
610 req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
611 req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ?
612 RTM_DELNEIGH : RTM_NEWNEIGH;
613 req->hdr.nlmsg_flags = NLM_F_REQUEST;
614 if (req->hdr.nlmsg_type == RTM_NEWNEIGH)
615 req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
616
617 /* Construct ndmsg */
618 req->ndm.ndm_family = AF_BRIDGE;
619 req->ndm.ndm_ifindex = mac->vxlan_if;
620
621 req->ndm.ndm_state = NUD_REACHABLE;
622 req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
623 if (CHECK_FLAG(mac->zebra_flags,
624 (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)))
625 req->ndm.ndm_state |= NUD_NOARP;
626 else
627 req->ndm.ndm_flags |= NTF_EXT_LEARNED;
628
629 /* Add attributes */
630 addattr_l(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6);
631 addattr_l(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4);
632 addattr32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if);
633 addattr32(&req->hdr, in_buf_len, NDA_VNI, mac->vni);
634
635 assert(req->hdr.nlmsg_len < in_buf_len);
636
637 zfpm_debug("Tx %s family %s ifindex %u MAC %s DEST %s",
638 nl_msg_type_to_str(req->hdr.nlmsg_type),
639 nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex,
640 prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)),
641 inet_ntoa(mac->r_vtep_ip));
642
643 return req->hdr.nlmsg_len;
644 }
645
646 #endif /* HAVE_NETLINK */