]> git.proxmox.com Git - mirror_frr.git/blame - zebra/zebra_nhg.c
zebra: nht resolution default configurable per vrf
[mirror_frr.git] / zebra / zebra_nhg.c
CommitLineData
ad28e79a
SW
1/* Zebra Nexthop Group Code.
2 * Copyright (C) 2019 Cumulus Networks, Inc.
3 * Donald Sharp
4 * Stephen Worley
5 *
6 * This file is part of FRR.
7 *
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23#include <zebra.h>
24
25#include "lib/nexthop.h"
50d89650 26#include "lib/nexthop_group_private.h"
ad28e79a
SW
27#include "lib/routemap.h"
28
29#include "zebra/connected.h"
30#include "zebra/debug.h"
31#include "zebra/zebra_router.h"
32#include "zebra/zebra_nhg.h"
33#include "zebra/zebra_rnh.h"
34#include "zebra/zebra_routemap.h"
35#include "zebra/rt.h"
36
37static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop,
38 struct nexthop *nexthop)
39{
40 struct nexthop *resolved_hop;
41
42 resolved_hop = nexthop_new();
43 SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
44
45 resolved_hop->vrf_id = nexthop->vrf_id;
46 switch (newhop->type) {
47 case NEXTHOP_TYPE_IPV4:
48 case NEXTHOP_TYPE_IPV4_IFINDEX:
49 /* If the resolving route specifies a gateway, use it */
50 resolved_hop->type = newhop->type;
51 resolved_hop->gate.ipv4 = newhop->gate.ipv4;
52
53 if (newhop->ifindex) {
54 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
55 resolved_hop->ifindex = newhop->ifindex;
56 }
57 break;
58 case NEXTHOP_TYPE_IPV6:
59 case NEXTHOP_TYPE_IPV6_IFINDEX:
60 resolved_hop->type = newhop->type;
61 resolved_hop->gate.ipv6 = newhop->gate.ipv6;
62
63 if (newhop->ifindex) {
64 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
65 resolved_hop->ifindex = newhop->ifindex;
66 }
67 break;
68 case NEXTHOP_TYPE_IFINDEX:
69 /* If the resolving route is an interface route,
70 * it means the gateway we are looking up is connected
71 * to that interface. (The actual network is _not_ onlink).
72 * Therefore, the resolved route should have the original
73 * gateway as nexthop as it is directly connected.
74 *
75 * On Linux, we have to set the onlink netlink flag because
76 * otherwise, the kernel won't accept the route.
77 */
78 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
79 if (afi == AFI_IP) {
80 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
81 resolved_hop->gate.ipv4 = nexthop->gate.ipv4;
82 } else if (afi == AFI_IP6) {
83 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
84 resolved_hop->gate.ipv6 = nexthop->gate.ipv6;
85 }
86 resolved_hop->ifindex = newhop->ifindex;
87 break;
88 case NEXTHOP_TYPE_BLACKHOLE:
89 resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE;
2dc359a6 90 resolved_hop->bh_type = newhop->bh_type;
ad28e79a
SW
91 break;
92 }
93
94 if (newhop->flags & NEXTHOP_FLAG_ONLINK)
95 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
96
97 /* Copy labels of the resolved route */
98 if (newhop->nh_label)
99 nexthop_add_labels(resolved_hop, newhop->nh_label_type,
100 newhop->nh_label->num_labels,
101 &newhop->nh_label->label[0]);
102
103 resolved_hop->rparent = nexthop;
50d89650 104 _nexthop_add(&nexthop->resolved, resolved_hop);
ad28e79a
SW
105}
106
107/*
108 * Given a nexthop we need to properly recursively resolve
109 * the route. As such, do a table lookup to find and match
110 * if at all possible. Set the nexthop->ifindex as appropriate
111 */
112static int nexthop_active(afi_t afi, struct route_entry *re,
113 struct nexthop *nexthop, struct route_node *top)
114{
115 struct prefix p;
116 struct route_table *table;
117 struct route_node *rn;
118 struct route_entry *match = NULL;
119 int resolved;
120 struct nexthop *newhop;
121 struct interface *ifp;
122 rib_dest_t *dest;
5a0bdc78 123 struct zebra_vrf *zvrf;
ad28e79a
SW
124
125 if ((nexthop->type == NEXTHOP_TYPE_IPV4)
126 || nexthop->type == NEXTHOP_TYPE_IPV6)
127 nexthop->ifindex = 0;
128
129 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
130 nexthops_free(nexthop->resolved);
131 nexthop->resolved = NULL;
132 re->nexthop_mtu = 0;
133
134 /*
135 * If the kernel has sent us a route, then
136 * by golly gee whiz it's a good route.
137 */
138 if (re->type == ZEBRA_ROUTE_KERNEL || re->type == ZEBRA_ROUTE_SYSTEM)
139 return 1;
140
141 /*
142 * Check to see if we should trust the passed in information
143 * for UNNUMBERED interfaces as that we won't find the GW
144 * address in the routing table.
145 * This check should suffice to handle IPv4 or IPv6 routes
146 * sourced from EVPN routes which are installed with the
147 * next hop as the remote VTEP IP.
148 */
149 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) {
150 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
151 if (!ifp) {
152 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
153 zlog_debug(
154 "\t%s: Onlink and interface: %u[%u] does not exist",
155 __PRETTY_FUNCTION__, nexthop->ifindex,
156 nexthop->vrf_id);
157 return 0;
158 }
159 if (connected_is_unnumbered(ifp)) {
160 if (if_is_operative(ifp))
161 return 1;
162 else {
163 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
164 zlog_debug(
165 "\t%s: Onlink and interface %s is not operative",
166 __PRETTY_FUNCTION__, ifp->name);
167 return 0;
168 }
169 }
170 if (!if_is_operative(ifp)) {
171 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
172 zlog_debug(
173 "\t%s: Interface %s is not unnumbered",
174 __PRETTY_FUNCTION__, ifp->name);
175 return 0;
176 }
177 }
178
179 /* Make lookup prefix. */
180 memset(&p, 0, sizeof(struct prefix));
181 switch (afi) {
182 case AFI_IP:
183 p.family = AF_INET;
184 p.prefixlen = IPV4_MAX_PREFIXLEN;
185 p.u.prefix4 = nexthop->gate.ipv4;
186 break;
187 case AFI_IP6:
188 p.family = AF_INET6;
189 p.prefixlen = IPV6_MAX_PREFIXLEN;
190 p.u.prefix6 = nexthop->gate.ipv6;
191 break;
192 default:
193 assert(afi != AFI_IP && afi != AFI_IP6);
194 break;
195 }
196 /* Lookup table. */
197 table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id);
5a0bdc78
PG
198 /* get zvrf */
199 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
200 if (!table || !zvrf) {
ad28e79a
SW
201 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
202 zlog_debug("\t%s: Table not found",
203 __PRETTY_FUNCTION__);
204 return 0;
205 }
206
207 rn = route_node_match(table, (struct prefix *)&p);
208 while (rn) {
209 route_unlock_node(rn);
210
211 /* Lookup should halt if we've matched against ourselves ('top',
212 * if specified) - i.e., we cannot have a nexthop NH1 is
213 * resolved by a route NH1. The exception is if the route is a
214 * host route.
215 */
216 if (top && rn == top)
217 if (((afi == AFI_IP) && (rn->p.prefixlen != 32))
218 || ((afi == AFI_IP6) && (rn->p.prefixlen != 128))) {
219 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
220 zlog_debug(
221 "\t%s: Matched against ourself and prefix length is not max bit length",
222 __PRETTY_FUNCTION__);
223 return 0;
224 }
225
226 /* Pick up selected route. */
227 /* However, do not resolve over default route unless explicitly
228 * allowed. */
229 if (is_default_prefix(&rn->p)
5a0bdc78 230 && !rnh_resolve_via_default(zvrf, p.family)) {
ad28e79a
SW
231 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
232 zlog_debug(
233 "\t:%s: Resolved against default route",
234 __PRETTY_FUNCTION__);
235 return 0;
236 }
237
238 dest = rib_dest_from_rnode(rn);
239 if (dest && dest->selected_fib
240 && !CHECK_FLAG(dest->selected_fib->status,
241 ROUTE_ENTRY_REMOVED)
242 && dest->selected_fib->type != ZEBRA_ROUTE_TABLE)
243 match = dest->selected_fib;
244
245 /* If there is no selected route or matched route is EGP, go up
246 tree. */
247 if (!match) {
248 do {
249 rn = rn->parent;
250 } while (rn && rn->info == NULL);
251 if (rn)
252 route_lock_node(rn);
253
254 continue;
255 }
256
257 if (match->type == ZEBRA_ROUTE_CONNECT) {
258 /* Directly point connected route. */
259 newhop = match->ng.nexthop;
260 if (newhop) {
261 if (nexthop->type == NEXTHOP_TYPE_IPV4
262 || nexthop->type == NEXTHOP_TYPE_IPV6)
263 nexthop->ifindex = newhop->ifindex;
264 }
265 return 1;
266 } else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
267 resolved = 0;
268 for (ALL_NEXTHOPS(match->ng, newhop)) {
269 if (!CHECK_FLAG(match->status,
270 ROUTE_ENTRY_INSTALLED))
271 continue;
272 if (CHECK_FLAG(newhop->flags,
273 NEXTHOP_FLAG_RECURSIVE))
274 continue;
275
276 SET_FLAG(nexthop->flags,
277 NEXTHOP_FLAG_RECURSIVE);
ad28e79a
SW
278 nexthop_set_resolved(afi, newhop, nexthop);
279 resolved = 1;
280 }
281 if (resolved)
282 re->nexthop_mtu = match->mtu;
283 if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED)
284 zlog_debug("\t%s: Recursion failed to find",
285 __PRETTY_FUNCTION__);
286 return resolved;
287 } else if (re->type == ZEBRA_ROUTE_STATIC) {
288 resolved = 0;
289 for (ALL_NEXTHOPS(match->ng, newhop)) {
290 if (!CHECK_FLAG(match->status,
291 ROUTE_ENTRY_INSTALLED))
292 continue;
293 if (CHECK_FLAG(newhop->flags,
294 NEXTHOP_FLAG_RECURSIVE))
295 continue;
296
297 SET_FLAG(nexthop->flags,
298 NEXTHOP_FLAG_RECURSIVE);
299 nexthop_set_resolved(afi, newhop, nexthop);
300 resolved = 1;
301 }
302 if (resolved)
303 re->nexthop_mtu = match->mtu;
304
305 if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED)
306 zlog_debug(
307 "\t%s: Static route unable to resolve",
308 __PRETTY_FUNCTION__);
309 return resolved;
310 } else {
311 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
312 zlog_debug(
313 "\t%s: Route Type %s has not turned on recursion",
314 __PRETTY_FUNCTION__,
315 zebra_route_string(re->type));
316 if (re->type == ZEBRA_ROUTE_BGP
317 && !CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP))
318 zlog_debug(
319 "\tEBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
320 }
321 return 0;
322 }
323 }
324 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
325 zlog_debug("\t%s: Nexthop did not lookup in table",
326 __PRETTY_FUNCTION__);
327 return 0;
328}
329
330/* This function verifies reachability of one given nexthop, which can be
331 * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored
332 * in nexthop->flags field. The nexthop->ifindex will be updated
333 * appropriately as well. An existing route map can turn
334 * (otherwise active) nexthop into inactive, but not vice versa.
335 *
336 * The return value is the final value of 'ACTIVE' flag.
337 */
338static unsigned nexthop_active_check(struct route_node *rn,
339 struct route_entry *re,
340 struct nexthop *nexthop)
341{
342 struct interface *ifp;
b68885f9 343 route_map_result_t ret = RMAP_PERMITMATCH;
ad28e79a
SW
344 int family;
345 char buf[SRCDEST2STR_BUFFER];
346 const struct prefix *p, *src_p;
347 struct zebra_vrf *zvrf;
348
349 srcdest_rnode_prefixes(rn, &p, &src_p);
350
351 if (rn->p.family == AF_INET)
352 family = AFI_IP;
353 else if (rn->p.family == AF_INET6)
354 family = AFI_IP6;
355 else
356 family = 0;
357 switch (nexthop->type) {
358 case NEXTHOP_TYPE_IFINDEX:
359 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
360 if (ifp && if_is_operative(ifp))
361 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
362 else
363 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
364 break;
365 case NEXTHOP_TYPE_IPV4:
366 case NEXTHOP_TYPE_IPV4_IFINDEX:
367 family = AFI_IP;
368 if (nexthop_active(AFI_IP, re, nexthop, rn))
369 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
370 else
371 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
372 break;
373 case NEXTHOP_TYPE_IPV6:
374 family = AFI_IP6;
375 if (nexthop_active(AFI_IP6, re, nexthop, rn))
376 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
377 else
378 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
379 break;
380 case NEXTHOP_TYPE_IPV6_IFINDEX:
381 /* RFC 5549, v4 prefix with v6 NH */
382 if (rn->p.family != AF_INET)
383 family = AFI_IP6;
384 if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) {
385 ifp = if_lookup_by_index(nexthop->ifindex,
386 nexthop->vrf_id);
387 if (ifp && if_is_operative(ifp))
388 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
389 else
390 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
391 } else {
392 if (nexthop_active(AFI_IP6, re, nexthop, rn))
393 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
394 else
395 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
396 }
397 break;
398 case NEXTHOP_TYPE_BLACKHOLE:
399 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
400 break;
401 default:
402 break;
403 }
404 if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
405 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
406 zlog_debug("\t%s: Unable to find a active nexthop",
407 __PRETTY_FUNCTION__);
408 return 0;
409 }
410
411 /* XXX: What exactly do those checks do? Do we support
412 * e.g. IPv4 routes with IPv6 nexthops or vice versa?
413 */
414 if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET)
415 || (family == AFI_IP6 && p->family != AF_INET6))
416 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
417
418 /* The original code didn't determine the family correctly
419 * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi
420 * from the rib_table_info in those cases.
421 * Possibly it may be better to use only the rib_table_info
422 * in every case.
423 */
424 if (!family) {
425 rib_table_info_t *info;
426
427 info = srcdest_rnode_table_info(rn);
428 family = info->afi;
429 }
430
431 memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr));
432
433 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
434 if (!zvrf) {
435 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
436 zlog_debug("\t%s: zvrf is NULL", __PRETTY_FUNCTION__);
437 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
438 }
439
440 /* It'll get set if required inside */
441 ret = zebra_route_map_check(family, re->type, re->instance, p, nexthop,
442 zvrf, re->tag);
443 if (ret == RMAP_DENYMATCH) {
444 if (IS_ZEBRA_DEBUG_RIB) {
445 srcdest_rnode2str(rn, buf, sizeof(buf));
446 zlog_debug(
447 "%u:%s: Filtering out with NH out %s due to route map",
448 re->vrf_id, buf,
449 ifindex2ifname(nexthop->ifindex,
450 nexthop->vrf_id));
451 }
452 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
453 }
454 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
455}
456
457/*
458 * Iterate over all nexthops of the given RIB entry and refresh their
459 * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any
460 * nexthop is found to toggle the ACTIVE flag, the whole re structure
461 * is flagged with ROUTE_ENTRY_CHANGED.
462 *
463 * Return value is the new number of active nexthops.
464 */
465int nexthop_active_update(struct route_node *rn, struct route_entry *re)
466{
467 struct nexthop *nexthop;
468 union g_addr prev_src;
469 unsigned int prev_active, new_active;
470 ifindex_t prev_index;
471
472 re->nexthop_active_num = 0;
473 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
474
475 for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
476 /* No protocol daemon provides src and so we're skipping
477 * tracking it */
478 prev_src = nexthop->rmap_src;
479 prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
480 prev_index = nexthop->ifindex;
481 /*
482 * We need to respect the multipath_num here
483 * as that what we should be able to install from
484 * a multipath perpsective should not be a data plane
485 * decision point.
486 */
487 new_active = nexthop_active_check(rn, re, nexthop);
488 if (new_active
489 && re->nexthop_active_num >= zrouter.multipath_num) {
490 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
491 new_active = 0;
492 }
493 if (new_active)
494 re->nexthop_active_num++;
495 /* Don't allow src setting on IPv6 addr for now */
496 if (prev_active != new_active || prev_index != nexthop->ifindex
497 || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX
498 && nexthop->type < NEXTHOP_TYPE_IPV6)
499 && prev_src.ipv4.s_addr
500 != nexthop->rmap_src.ipv4.s_addr)
501 || ((nexthop->type >= NEXTHOP_TYPE_IPV6
502 && nexthop->type < NEXTHOP_TYPE_BLACKHOLE)
503 && !(IPV6_ADDR_SAME(&prev_src.ipv6,
504 &nexthop->rmap_src.ipv6)))
42fc558e 505 || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED))
ad28e79a 506 SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
ad28e79a
SW
507 }
508
509 return re->nexthop_active_num;
510}
511