]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_rib.c
Merge pull request #4295 from donaldsharp/topotest_if
[mirror_frr.git] / zebra / zebra_rib.c
1 /* Routing Information Base.
2 * Copyright (C) 1997, 98, 99, 2001 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "if.h"
25 #include "linklist.h"
26 #include "log.h"
27 #include "memory.h"
28 #include "mpls.h"
29 #include "nexthop.h"
30 #include "prefix.h"
31 #include "prefix.h"
32 #include "routemap.h"
33 #include "sockunion.h"
34 #include "srcdest_table.h"
35 #include "table.h"
36 #include "thread.h"
37 #include "vrf.h"
38 #include "workqueue.h"
39
40 #include "zebra/zebra_router.h"
41 #include "zebra/connected.h"
42 #include "zebra/debug.h"
43 #include "zebra/interface.h"
44 #include "zebra/redistribute.h"
45 #include "zebra/rib.h"
46 #include "zebra/rt.h"
47 #include "zebra/zapi_msg.h"
48 #include "zebra/zebra_errors.h"
49 #include "zebra/zebra_memory.h"
50 #include "zebra/zebra_ns.h"
51 #include "zebra/zebra_rnh.h"
52 #include "zebra/zebra_routemap.h"
53 #include "zebra/zebra_vrf.h"
54 #include "zebra/zebra_vxlan.h"
55 #include "zebra/zapi_msg.h"
56 #include "zebra/zebra_dplane.h"
57
58 /*
59 * Event, list, and mutex for delivery of dataplane results
60 */
61 static pthread_mutex_t dplane_mutex;
62 static struct thread *t_dplane;
63 static struct dplane_ctx_q rib_dplane_q;
64
65 DEFINE_HOOK(rib_update, (struct route_node * rn, const char *reason),
66 (rn, reason))
67
68 /* Should we allow non Quagga processes to delete our routes */
69 extern int allow_delete;
70
71 /* Each route type's string and default distance value. */
72 static const struct {
73 int key;
74 uint8_t distance;
75 uint8_t meta_q_map;
76 } route_info[ZEBRA_ROUTE_MAX] = {
77 [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, 4},
78 [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, 0},
79 [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, 0},
80 [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, 1},
81 [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, 2},
82 [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, 2},
83 [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, 2},
84 [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, 2},
85 [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, 2},
86 [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, 3},
87 [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, 4},
88 [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, 2},
89 [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, 2},
90 [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, 4},
91 [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, 4},
92 [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, 1},
93 [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, 4},
94 [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, 3},
95 [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, 3},
96 [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, 3},
97 [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, 3},
98 [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, 3},
99 [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, 2},
100 [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, 4},
101 [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, 4},
102 [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, 4},
103 [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, 2},
104 /* Any new route type added to zebra, should be mirrored here */
105
106 /* no entry/default: 150 */
107 };
108
109 /* RPF lookup behaviour */
110 static enum multicast_mode ipv4_multicast_mode = MCAST_NO_CONFIG;
111
112
113 static void __attribute__((format(printf, 5, 6)))
114 _rnode_zlog(const char *_func, vrf_id_t vrf_id, struct route_node *rn,
115 int priority, const char *msgfmt, ...)
116 {
117 char buf[SRCDEST2STR_BUFFER + sizeof(" (MRIB)")];
118 char msgbuf[512];
119 va_list ap;
120
121 va_start(ap, msgfmt);
122 vsnprintf(msgbuf, sizeof(msgbuf), msgfmt, ap);
123 va_end(ap);
124
125 if (rn) {
126 rib_table_info_t *info = srcdest_rnode_table_info(rn);
127 srcdest_rnode2str(rn, buf, sizeof(buf));
128
129 if (info->safi == SAFI_MULTICAST)
130 strcat(buf, " (MRIB)");
131 } else {
132 snprintf(buf, sizeof(buf), "{(route_node *) NULL}");
133 }
134
135 zlog(priority, "%s: %d:%s: %s", _func, vrf_id, buf, msgbuf);
136 }
137
138 #define rnode_debug(node, vrf_id, ...) \
139 _rnode_zlog(__func__, vrf_id, node, LOG_DEBUG, __VA_ARGS__)
140 #define rnode_info(node, ...) \
141 _rnode_zlog(__func__, vrf_id, node, LOG_INFO, __VA_ARGS__)
142
143 uint8_t route_distance(int type)
144 {
145 uint8_t distance;
146
147 if ((unsigned)type >= array_size(route_info))
148 distance = 150;
149 else
150 distance = route_info[type].distance;
151
152 return distance;
153 }
154
155 int is_zebra_valid_kernel_table(uint32_t table_id)
156 {
157 #ifdef linux
158 if ((table_id == RT_TABLE_UNSPEC) || (table_id == RT_TABLE_LOCAL)
159 || (table_id == RT_TABLE_COMPAT))
160 return 0;
161 #endif
162
163 return 1;
164 }
165
166 int is_zebra_main_routing_table(uint32_t table_id)
167 {
168 if (table_id == RT_TABLE_MAIN)
169 return 1;
170 return 0;
171 }
172
173 int zebra_check_addr(const struct prefix *p)
174 {
175 if (p->family == AF_INET) {
176 uint32_t addr;
177
178 addr = p->u.prefix4.s_addr;
179 addr = ntohl(addr);
180
181 if (IPV4_NET127(addr) || IN_CLASSD(addr)
182 || IPV4_LINKLOCAL(addr))
183 return 0;
184 }
185 if (p->family == AF_INET6) {
186 if (IN6_IS_ADDR_LOOPBACK(&p->u.prefix6))
187 return 0;
188 if (IN6_IS_ADDR_LINKLOCAL(&p->u.prefix6))
189 return 0;
190 }
191 return 1;
192 }
193
194 /* Add nexthop to the end of a rib node's nexthop list */
195 void route_entry_nexthop_add(struct route_entry *re, struct nexthop *nexthop)
196 {
197 nexthop_add(&re->ng.nexthop, nexthop);
198 re->nexthop_num++;
199 }
200
201
202 /**
203 * copy_nexthop - copy a nexthop to the rib structure.
204 */
205 void route_entry_copy_nexthops(struct route_entry *re, struct nexthop *nh)
206 {
207 assert(!re->ng.nexthop);
208 copy_nexthops(&re->ng.nexthop, nh, NULL);
209 for (struct nexthop *nexthop = nh; nexthop; nexthop = nexthop->next)
210 re->nexthop_num++;
211 }
212
213 /* Delete specified nexthop from the list. */
214 void route_entry_nexthop_delete(struct route_entry *re, struct nexthop *nexthop)
215 {
216 if (nexthop->next)
217 nexthop->next->prev = nexthop->prev;
218 if (nexthop->prev)
219 nexthop->prev->next = nexthop->next;
220 else
221 re->ng.nexthop = nexthop->next;
222 re->nexthop_num--;
223 }
224
225
226 struct nexthop *route_entry_nexthop_ifindex_add(struct route_entry *re,
227 ifindex_t ifindex,
228 vrf_id_t nh_vrf_id)
229 {
230 struct nexthop *nexthop;
231
232 nexthop = nexthop_new();
233 nexthop->type = NEXTHOP_TYPE_IFINDEX;
234 nexthop->ifindex = ifindex;
235 nexthop->vrf_id = nh_vrf_id;
236
237 route_entry_nexthop_add(re, nexthop);
238
239 return nexthop;
240 }
241
242 struct nexthop *route_entry_nexthop_ipv4_add(struct route_entry *re,
243 struct in_addr *ipv4,
244 struct in_addr *src,
245 vrf_id_t nh_vrf_id)
246 {
247 struct nexthop *nexthop;
248
249 nexthop = nexthop_new();
250 nexthop->type = NEXTHOP_TYPE_IPV4;
251 nexthop->vrf_id = nh_vrf_id;
252 nexthop->gate.ipv4 = *ipv4;
253 if (src)
254 nexthop->src.ipv4 = *src;
255
256 route_entry_nexthop_add(re, nexthop);
257
258 return nexthop;
259 }
260
261 struct nexthop *route_entry_nexthop_ipv4_ifindex_add(struct route_entry *re,
262 struct in_addr *ipv4,
263 struct in_addr *src,
264 ifindex_t ifindex,
265 vrf_id_t nh_vrf_id)
266 {
267 struct nexthop *nexthop;
268 struct interface *ifp;
269
270 nexthop = nexthop_new();
271 nexthop->vrf_id = nh_vrf_id;
272 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
273 nexthop->gate.ipv4 = *ipv4;
274 if (src)
275 nexthop->src.ipv4 = *src;
276 nexthop->ifindex = ifindex;
277 ifp = if_lookup_by_index(nexthop->ifindex, nh_vrf_id);
278 /*Pending: need to think if null ifp here is ok during bootup?
279 There was a crash because ifp here was coming to be NULL */
280 if (ifp)
281 if (connected_is_unnumbered(ifp))
282 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK);
283
284 route_entry_nexthop_add(re, nexthop);
285
286 return nexthop;
287 }
288
289 struct nexthop *route_entry_nexthop_ipv6_add(struct route_entry *re,
290 struct in6_addr *ipv6,
291 vrf_id_t nh_vrf_id)
292 {
293 struct nexthop *nexthop;
294
295 nexthop = nexthop_new();
296 nexthop->vrf_id = nh_vrf_id;
297 nexthop->type = NEXTHOP_TYPE_IPV6;
298 nexthop->gate.ipv6 = *ipv6;
299
300 route_entry_nexthop_add(re, nexthop);
301
302 return nexthop;
303 }
304
305 struct nexthop *route_entry_nexthop_ipv6_ifindex_add(struct route_entry *re,
306 struct in6_addr *ipv6,
307 ifindex_t ifindex,
308 vrf_id_t nh_vrf_id)
309 {
310 struct nexthop *nexthop;
311
312 nexthop = nexthop_new();
313 nexthop->vrf_id = nh_vrf_id;
314 nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
315 nexthop->gate.ipv6 = *ipv6;
316 nexthop->ifindex = ifindex;
317
318 route_entry_nexthop_add(re, nexthop);
319
320 return nexthop;
321 }
322
323 struct nexthop *route_entry_nexthop_blackhole_add(struct route_entry *re,
324 enum blackhole_type bh_type)
325 {
326 struct nexthop *nexthop;
327
328 nexthop = nexthop_new();
329 nexthop->vrf_id = VRF_DEFAULT;
330 nexthop->type = NEXTHOP_TYPE_BLACKHOLE;
331 nexthop->bh_type = bh_type;
332
333 route_entry_nexthop_add(re, nexthop);
334
335 return nexthop;
336 }
337
338 static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop,
339 struct nexthop *nexthop)
340 {
341 struct nexthop *resolved_hop;
342
343 resolved_hop = nexthop_new();
344 SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
345
346 resolved_hop->vrf_id = nexthop->vrf_id;
347 switch (newhop->type) {
348 case NEXTHOP_TYPE_IPV4:
349 case NEXTHOP_TYPE_IPV4_IFINDEX:
350 /* If the resolving route specifies a gateway, use it */
351 resolved_hop->type = newhop->type;
352 resolved_hop->gate.ipv4 = newhop->gate.ipv4;
353
354 if (newhop->ifindex) {
355 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
356 resolved_hop->ifindex = newhop->ifindex;
357 }
358 break;
359 case NEXTHOP_TYPE_IPV6:
360 case NEXTHOP_TYPE_IPV6_IFINDEX:
361 resolved_hop->type = newhop->type;
362 resolved_hop->gate.ipv6 = newhop->gate.ipv6;
363
364 if (newhop->ifindex) {
365 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
366 resolved_hop->ifindex = newhop->ifindex;
367 }
368 break;
369 case NEXTHOP_TYPE_IFINDEX:
370 /* If the resolving route is an interface route,
371 * it means the gateway we are looking up is connected
372 * to that interface. (The actual network is _not_ onlink).
373 * Therefore, the resolved route should have the original
374 * gateway as nexthop as it is directly connected.
375 *
376 * On Linux, we have to set the onlink netlink flag because
377 * otherwise, the kernel won't accept the route.
378 */
379 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
380 if (afi == AFI_IP) {
381 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
382 resolved_hop->gate.ipv4 = nexthop->gate.ipv4;
383 } else if (afi == AFI_IP6) {
384 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
385 resolved_hop->gate.ipv6 = nexthop->gate.ipv6;
386 }
387 resolved_hop->ifindex = newhop->ifindex;
388 break;
389 case NEXTHOP_TYPE_BLACKHOLE:
390 resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE;
391 resolved_hop->bh_type = nexthop->bh_type;
392 break;
393 }
394
395 if (newhop->flags & NEXTHOP_FLAG_ONLINK)
396 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
397
398 /* Copy labels of the resolved route */
399 if (newhop->nh_label)
400 nexthop_add_labels(resolved_hop, newhop->nh_label_type,
401 newhop->nh_label->num_labels,
402 &newhop->nh_label->label[0]);
403
404 resolved_hop->rparent = nexthop;
405 nexthop_add(&nexthop->resolved, resolved_hop);
406 }
407
408 /*
409 * Given a nexthop we need to properly recursively resolve
410 * the route. As such, do a table lookup to find and match
411 * if at all possible. Set the nexthop->ifindex as appropriate
412 */
413 static int nexthop_active(afi_t afi, struct route_entry *re,
414 struct nexthop *nexthop,
415 struct route_node *top)
416 {
417 struct prefix p;
418 struct route_table *table;
419 struct route_node *rn;
420 struct route_entry *match = NULL;
421 int resolved;
422 struct nexthop *newhop;
423 struct interface *ifp;
424 rib_dest_t *dest;
425
426 if ((nexthop->type == NEXTHOP_TYPE_IPV4)
427 || nexthop->type == NEXTHOP_TYPE_IPV6)
428 nexthop->ifindex = 0;
429
430 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
431 nexthops_free(nexthop->resolved);
432 nexthop->resolved = NULL;
433 re->nexthop_mtu = 0;
434
435 /*
436 * If the kernel has sent us a route, then
437 * by golly gee whiz it's a good route.
438 */
439 if (re->type == ZEBRA_ROUTE_KERNEL ||
440 re->type == ZEBRA_ROUTE_SYSTEM)
441 return 1;
442
443 /*
444 * Check to see if we should trust the passed in information
445 * for UNNUMBERED interfaces as that we won't find the GW
446 * address in the routing table.
447 * This check should suffice to handle IPv4 or IPv6 routes
448 * sourced from EVPN routes which are installed with the
449 * next hop as the remote VTEP IP.
450 */
451 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) {
452 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
453 if (!ifp) {
454 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
455 zlog_debug(
456 "\t%s: Onlink and interface: %u[%u] does not exist",
457 __PRETTY_FUNCTION__, nexthop->ifindex,
458 nexthop->vrf_id);
459 return 0;
460 }
461 if (connected_is_unnumbered(ifp)) {
462 if (if_is_operative(ifp))
463 return 1;
464 else {
465 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
466 zlog_debug(
467 "\t%s: Onlink and interface %s is not operative",
468 __PRETTY_FUNCTION__, ifp->name);
469 return 0;
470 }
471 }
472 if (!if_is_operative(ifp)) {
473 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
474 zlog_debug(
475 "\t%s: Interface %s is not unnumbered",
476 __PRETTY_FUNCTION__, ifp->name);
477 return 0;
478 }
479 }
480
481 /* Make lookup prefix. */
482 memset(&p, 0, sizeof(struct prefix));
483 switch (afi) {
484 case AFI_IP:
485 p.family = AF_INET;
486 p.prefixlen = IPV4_MAX_PREFIXLEN;
487 p.u.prefix4 = nexthop->gate.ipv4;
488 break;
489 case AFI_IP6:
490 p.family = AF_INET6;
491 p.prefixlen = IPV6_MAX_PREFIXLEN;
492 p.u.prefix6 = nexthop->gate.ipv6;
493 break;
494 default:
495 assert(afi != AFI_IP && afi != AFI_IP6);
496 break;
497 }
498 /* Lookup table. */
499 table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id);
500 if (!table) {
501 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
502 zlog_debug("\t%s: Table not found",
503 __PRETTY_FUNCTION__);
504 return 0;
505 }
506
507 rn = route_node_match(table, (struct prefix *)&p);
508 while (rn) {
509 route_unlock_node(rn);
510
511 /* Lookup should halt if we've matched against ourselves ('top',
512 * if specified) - i.e., we cannot have a nexthop NH1 is
513 * resolved by a route NH1. The exception is if the route is a
514 * host route.
515 */
516 if (top && rn == top)
517 if (((afi == AFI_IP) && (rn->p.prefixlen != 32))
518 || ((afi == AFI_IP6) && (rn->p.prefixlen != 128))) {
519 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
520 zlog_debug(
521 "\t%s: Matched against ourself and prefix length is not max bit length",
522 __PRETTY_FUNCTION__);
523 return 0;
524 }
525
526 /* Pick up selected route. */
527 /* However, do not resolve over default route unless explicitly
528 * allowed. */
529 if (is_default_prefix(&rn->p)
530 && !rnh_resolve_via_default(p.family)) {
531 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
532 zlog_debug(
533 "\t:%s: Resolved against default route",
534 __PRETTY_FUNCTION__);
535 return 0;
536 }
537
538 dest = rib_dest_from_rnode(rn);
539 if (dest && dest->selected_fib
540 && !CHECK_FLAG(dest->selected_fib->status,
541 ROUTE_ENTRY_REMOVED)
542 && dest->selected_fib->type != ZEBRA_ROUTE_TABLE)
543 match = dest->selected_fib;
544
545 /* If there is no selected route or matched route is EGP, go up
546 tree. */
547 if (!match) {
548 do {
549 rn = rn->parent;
550 } while (rn && rn->info == NULL);
551 if (rn)
552 route_lock_node(rn);
553
554 continue;
555 }
556
557 if (match->type == ZEBRA_ROUTE_CONNECT) {
558 /* Directly point connected route. */
559 newhop = match->ng.nexthop;
560 if (newhop) {
561 if (nexthop->type == NEXTHOP_TYPE_IPV4
562 || nexthop->type == NEXTHOP_TYPE_IPV6)
563 nexthop->ifindex = newhop->ifindex;
564 }
565 return 1;
566 } else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
567 resolved = 0;
568 for (ALL_NEXTHOPS(match->ng, newhop)) {
569 if (!CHECK_FLAG(match->status,
570 ROUTE_ENTRY_INSTALLED))
571 continue;
572 if (CHECK_FLAG(newhop->flags,
573 NEXTHOP_FLAG_RECURSIVE))
574 continue;
575
576 SET_FLAG(nexthop->flags,
577 NEXTHOP_FLAG_RECURSIVE);
578 SET_FLAG(re->status,
579 ROUTE_ENTRY_NEXTHOPS_CHANGED);
580 nexthop_set_resolved(afi, newhop, nexthop);
581 resolved = 1;
582 }
583 if (resolved)
584 re->nexthop_mtu = match->mtu;
585 if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED)
586 zlog_debug("\t%s: Recursion failed to find",
587 __PRETTY_FUNCTION__);
588 return resolved;
589 } else if (re->type == ZEBRA_ROUTE_STATIC) {
590 resolved = 0;
591 for (ALL_NEXTHOPS(match->ng, newhop)) {
592 if (!CHECK_FLAG(match->status,
593 ROUTE_ENTRY_INSTALLED))
594 continue;
595 if (CHECK_FLAG(newhop->flags,
596 NEXTHOP_FLAG_RECURSIVE))
597 continue;
598
599 SET_FLAG(nexthop->flags,
600 NEXTHOP_FLAG_RECURSIVE);
601 nexthop_set_resolved(afi, newhop, nexthop);
602 resolved = 1;
603 }
604 if (resolved)
605 re->nexthop_mtu = match->mtu;
606
607 if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED)
608 zlog_debug(
609 "\t%s: Static route unable to resolve",
610 __PRETTY_FUNCTION__);
611 return resolved;
612 } else {
613 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
614 zlog_debug("\t%s: Route Type %s has not turned on recursion",
615 __PRETTY_FUNCTION__,
616 zebra_route_string(re->type));
617 if (re->type == ZEBRA_ROUTE_BGP &&
618 !CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP))
619 zlog_debug("\tEBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
620 }
621 return 0;
622 }
623 }
624 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
625 zlog_debug("\t%s: Nexthop did not lookup in table",
626 __PRETTY_FUNCTION__);
627 return 0;
628 }
629
630 struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id,
631 union g_addr *addr, struct route_node **rn_out)
632 {
633 struct prefix p;
634 struct route_table *table;
635 struct route_node *rn;
636 struct route_entry *match = NULL;
637
638 /* Lookup table. */
639 table = zebra_vrf_table(afi, safi, vrf_id);
640 if (!table)
641 return 0;
642
643 memset(&p, 0, sizeof(struct prefix));
644 p.family = afi;
645 if (afi == AFI_IP) {
646 p.u.prefix4 = addr->ipv4;
647 p.prefixlen = IPV4_MAX_PREFIXLEN;
648 } else {
649 p.u.prefix6 = addr->ipv6;
650 p.prefixlen = IPV6_MAX_PREFIXLEN;
651 }
652
653 rn = route_node_match(table, (struct prefix *)&p);
654
655 while (rn) {
656 rib_dest_t *dest;
657
658 route_unlock_node(rn);
659
660 dest = rib_dest_from_rnode(rn);
661 if (dest && dest->selected_fib
662 && !CHECK_FLAG(dest->selected_fib->status,
663 ROUTE_ENTRY_REMOVED))
664 match = dest->selected_fib;
665
666 /* If there is no selected route or matched route is EGP, go up
667 tree. */
668 if (!match) {
669 do {
670 rn = rn->parent;
671 } while (rn && rn->info == NULL);
672 if (rn)
673 route_lock_node(rn);
674 } else {
675 if (match->type != ZEBRA_ROUTE_CONNECT) {
676 if (!CHECK_FLAG(match->status,
677 ROUTE_ENTRY_INSTALLED))
678 return NULL;
679 }
680
681 if (rn_out)
682 *rn_out = rn;
683 return match;
684 }
685 }
686 return NULL;
687 }
688
689 struct route_entry *rib_match_ipv4_multicast(vrf_id_t vrf_id,
690 struct in_addr addr,
691 struct route_node **rn_out)
692 {
693 struct route_entry *re = NULL, *mre = NULL, *ure = NULL;
694 struct route_node *m_rn = NULL, *u_rn = NULL;
695 union g_addr gaddr = {.ipv4 = addr};
696
697 switch (ipv4_multicast_mode) {
698 case MCAST_MRIB_ONLY:
699 return rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr,
700 rn_out);
701 case MCAST_URIB_ONLY:
702 return rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, rn_out);
703 case MCAST_NO_CONFIG:
704 case MCAST_MIX_MRIB_FIRST:
705 re = mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr,
706 &m_rn);
707 if (!mre)
708 re = ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id,
709 &gaddr, &u_rn);
710 break;
711 case MCAST_MIX_DISTANCE:
712 mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn);
713 ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn);
714 if (mre && ure)
715 re = ure->distance < mre->distance ? ure : mre;
716 else if (mre)
717 re = mre;
718 else if (ure)
719 re = ure;
720 break;
721 case MCAST_MIX_PFXLEN:
722 mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn);
723 ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn);
724 if (mre && ure)
725 re = u_rn->p.prefixlen > m_rn->p.prefixlen ? ure : mre;
726 else if (mre)
727 re = mre;
728 else if (ure)
729 re = ure;
730 break;
731 }
732
733 if (rn_out)
734 *rn_out = (re == mre) ? m_rn : u_rn;
735
736 if (IS_ZEBRA_DEBUG_RIB) {
737 char buf[BUFSIZ];
738 inet_ntop(AF_INET, &addr, buf, BUFSIZ);
739
740 zlog_debug("%s: %s: vrf: %u found %s, using %s",
741 __func__, buf, vrf_id,
742 mre ? (ure ? "MRIB+URIB" : "MRIB")
743 : ure ? "URIB" : "nothing",
744 re == ure ? "URIB" : re == mre ? "MRIB" : "none");
745 }
746 return re;
747 }
748
749 void multicast_mode_ipv4_set(enum multicast_mode mode)
750 {
751 if (IS_ZEBRA_DEBUG_RIB)
752 zlog_debug("%s: multicast lookup mode set (%d)", __func__,
753 mode);
754 ipv4_multicast_mode = mode;
755 }
756
757 enum multicast_mode multicast_mode_ipv4_get(void)
758 {
759 return ipv4_multicast_mode;
760 }
761
762 struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, vrf_id_t vrf_id)
763 {
764 struct route_table *table;
765 struct route_node *rn;
766 struct route_entry *match = NULL;
767 rib_dest_t *dest;
768
769 /* Lookup table. */
770 table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id);
771 if (!table)
772 return 0;
773
774 rn = route_node_lookup(table, (struct prefix *)p);
775
776 /* No route for this prefix. */
777 if (!rn)
778 return NULL;
779
780 /* Unlock node. */
781 route_unlock_node(rn);
782 dest = rib_dest_from_rnode(rn);
783
784 if (dest && dest->selected_fib
785 && !CHECK_FLAG(dest->selected_fib->status, ROUTE_ENTRY_REMOVED))
786 match = dest->selected_fib;
787
788 if (!match)
789 return NULL;
790
791 if (match->type == ZEBRA_ROUTE_CONNECT)
792 return match;
793
794 if (CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED))
795 return match;
796
797 return NULL;
798 }
799
800 #define RIB_SYSTEM_ROUTE(R) \
801 ((R)->type == ZEBRA_ROUTE_KERNEL || (R)->type == ZEBRA_ROUTE_CONNECT)
802
803 #define RIB_KERNEL_ROUTE(R) \
804 ((R)->type == ZEBRA_ROUTE_KERNEL)
805
806 /* This function verifies reachability of one given nexthop, which can be
807 * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored
808 * in nexthop->flags field. The nexthop->ifindex will be updated
809 * appropriately as well. An existing route map can turn
810 * (otherwise active) nexthop into inactive, but not vice versa.
811 *
812 * The return value is the final value of 'ACTIVE' flag.
813 */
814 static unsigned nexthop_active_check(struct route_node *rn,
815 struct route_entry *re,
816 struct nexthop *nexthop)
817 {
818 struct interface *ifp;
819 route_map_result_t ret = RMAP_MATCH;
820 int family;
821 char buf[SRCDEST2STR_BUFFER];
822 const struct prefix *p, *src_p;
823 struct zebra_vrf *zvrf;
824
825 srcdest_rnode_prefixes(rn, &p, &src_p);
826
827 if (rn->p.family == AF_INET)
828 family = AFI_IP;
829 else if (rn->p.family == AF_INET6)
830 family = AFI_IP6;
831 else
832 family = 0;
833 switch (nexthop->type) {
834 case NEXTHOP_TYPE_IFINDEX:
835 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
836 if (ifp && if_is_operative(ifp))
837 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
838 else
839 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
840 break;
841 case NEXTHOP_TYPE_IPV4:
842 case NEXTHOP_TYPE_IPV4_IFINDEX:
843 family = AFI_IP;
844 if (nexthop_active(AFI_IP, re, nexthop, rn))
845 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
846 else
847 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
848 break;
849 case NEXTHOP_TYPE_IPV6:
850 family = AFI_IP6;
851 if (nexthop_active(AFI_IP6, re, nexthop, rn))
852 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
853 else
854 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
855 break;
856 case NEXTHOP_TYPE_IPV6_IFINDEX:
857 /* RFC 5549, v4 prefix with v6 NH */
858 if (rn->p.family != AF_INET)
859 family = AFI_IP6;
860 if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) {
861 ifp = if_lookup_by_index(nexthop->ifindex,
862 nexthop->vrf_id);
863 if (ifp && if_is_operative(ifp))
864 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
865 else
866 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
867 } else {
868 if (nexthop_active(AFI_IP6, re, nexthop, rn))
869 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
870 else
871 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
872 }
873 break;
874 case NEXTHOP_TYPE_BLACKHOLE:
875 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
876 break;
877 default:
878 break;
879 }
880 if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
881 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
882 zlog_debug("\t%s: Unable to find a active nexthop",
883 __PRETTY_FUNCTION__);
884 return 0;
885 }
886
887 /* XXX: What exactly do those checks do? Do we support
888 * e.g. IPv4 routes with IPv6 nexthops or vice versa?
889 */
890 if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET)
891 || (family == AFI_IP6 && p->family != AF_INET6))
892 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
893
894 /* The original code didn't determine the family correctly
895 * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi
896 * from the rib_table_info in those cases.
897 * Possibly it may be better to use only the rib_table_info
898 * in every case.
899 */
900 if (!family) {
901 rib_table_info_t *info;
902
903 info = srcdest_rnode_table_info(rn);
904 family = info->afi;
905 }
906
907 memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr));
908
909 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
910 if (!zvrf) {
911 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
912 zlog_debug("\t%s: zvrf is NULL", __PRETTY_FUNCTION__);
913 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
914 }
915
916 /* It'll get set if required inside */
917 ret = zebra_route_map_check(family, re->type, re->instance, p,
918 nexthop, zvrf, re->tag);
919 if (ret == RMAP_DENYMATCH) {
920 if (IS_ZEBRA_DEBUG_RIB) {
921 srcdest_rnode2str(rn, buf, sizeof(buf));
922 zlog_debug(
923 "%u:%s: Filtering out with NH out %s due to route map",
924 re->vrf_id, buf,
925 ifindex2ifname(nexthop->ifindex,
926 nexthop->vrf_id));
927 }
928 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
929 }
930 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
931 }
932
933 /*
934 * Iterate over all nexthops of the given RIB entry and refresh their
935 * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any
936 * nexthop is found to toggle the ACTIVE flag, the whole re structure
937 * is flagged with ROUTE_ENTRY_CHANGED.
938 *
939 * Return value is the new number of active nexthops.
940 */
941 static int nexthop_active_update(struct route_node *rn, struct route_entry *re)
942 {
943 struct nexthop *nexthop;
944 union g_addr prev_src;
945 unsigned int prev_active, new_active;
946 ifindex_t prev_index;
947
948 re->nexthop_active_num = 0;
949 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
950
951 for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
952 /* No protocol daemon provides src and so we're skipping
953 * tracking it */
954 prev_src = nexthop->rmap_src;
955 prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
956 prev_index = nexthop->ifindex;
957 /*
958 * We need to respect the multipath_num here
959 * as that what we should be able to install from
960 * a multipath perpsective should not be a data plane
961 * decision point.
962 */
963 new_active = nexthop_active_check(rn, re, nexthop);
964 if (new_active && re->nexthop_active_num >= multipath_num) {
965 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
966 new_active = 0;
967 }
968 if (new_active)
969 re->nexthop_active_num++;
970 /* Don't allow src setting on IPv6 addr for now */
971 if (prev_active != new_active || prev_index != nexthop->ifindex
972 || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX
973 && nexthop->type < NEXTHOP_TYPE_IPV6)
974 && prev_src.ipv4.s_addr
975 != nexthop->rmap_src.ipv4.s_addr)
976 || ((nexthop->type >= NEXTHOP_TYPE_IPV6
977 && nexthop->type < NEXTHOP_TYPE_BLACKHOLE)
978 && !(IPV6_ADDR_SAME(&prev_src.ipv6,
979 &nexthop->rmap_src.ipv6)))
980 || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)) {
981 SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
982 SET_FLAG(re->status, ROUTE_ENTRY_NEXTHOPS_CHANGED);
983 }
984 }
985
986 return re->nexthop_active_num;
987 }
988
989 /*
990 * Is this RIB labeled-unicast? It must be of type BGP and all paths
991 * (nexthops) must have a label.
992 */
993 int zebra_rib_labeled_unicast(struct route_entry *re)
994 {
995 struct nexthop *nexthop = NULL;
996
997 if (re->type != ZEBRA_ROUTE_BGP)
998 return 0;
999
1000 for (ALL_NEXTHOPS(re->ng, nexthop))
1001 if (!nexthop->nh_label || !nexthop->nh_label->num_labels)
1002 return 0;
1003
1004 return 1;
1005 }
1006
1007 /* Update flag indicates whether this is a "replace" or not. Currently, this
1008 * is only used for IPv4.
1009 */
1010 void rib_install_kernel(struct route_node *rn, struct route_entry *re,
1011 struct route_entry *old)
1012 {
1013 struct nexthop *nexthop;
1014 rib_table_info_t *info = srcdest_rnode_table_info(rn);
1015 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1016 const struct prefix *p, *src_p;
1017 enum zebra_dplane_result ret;
1018
1019 rib_dest_t *dest = rib_dest_from_rnode(rn);
1020
1021 srcdest_rnode_prefixes(rn, &p, &src_p);
1022
1023 if (info->safi != SAFI_UNICAST) {
1024 for (ALL_NEXTHOPS(re->ng, nexthop))
1025 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
1026 return;
1027 } else {
1028 struct nexthop *prev;
1029
1030 for (ALL_NEXTHOPS(re->ng, nexthop)) {
1031 UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_DUPLICATE);
1032 for (ALL_NEXTHOPS(re->ng, prev)) {
1033 if (prev == nexthop)
1034 break;
1035 if (nexthop_same_firsthop(nexthop, prev)) {
1036 SET_FLAG(nexthop->flags,
1037 NEXTHOP_FLAG_DUPLICATE);
1038 break;
1039 }
1040 }
1041 }
1042 }
1043
1044 /*
1045 * If this is a replace to a new RE let the originator of the RE
1046 * know that they've lost
1047 */
1048 if (old && (old != re) && (old->type != re->type))
1049 zsend_route_notify_owner(old, p, ZAPI_ROUTE_BETTER_ADMIN_WON);
1050
1051 /* Update fib selection */
1052 dest->selected_fib = re;
1053
1054 /*
1055 * Make sure we update the FPM any time we send new information to
1056 * the kernel.
1057 */
1058 hook_call(rib_update, rn, "installing in kernel");
1059
1060 /* Send add or update */
1061 if (old)
1062 ret = dplane_route_update(rn, re, old);
1063 else
1064 ret = dplane_route_add(rn, re);
1065
1066 switch (ret) {
1067 case ZEBRA_DPLANE_REQUEST_QUEUED:
1068 SET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
1069 if (old)
1070 SET_FLAG(old->status, ROUTE_ENTRY_QUEUED);
1071 if (zvrf)
1072 zvrf->installs_queued++;
1073 break;
1074 case ZEBRA_DPLANE_REQUEST_FAILURE:
1075 {
1076 char str[SRCDEST2STR_BUFFER];
1077
1078 srcdest_rnode2str(rn, str, sizeof(str));
1079 flog_err(EC_ZEBRA_DP_INSTALL_FAIL,
1080 "%u:%s: Failed to enqueue dataplane install",
1081 re->vrf_id, str);
1082 break;
1083 }
1084 case ZEBRA_DPLANE_REQUEST_SUCCESS:
1085 if (zvrf)
1086 zvrf->installs++;
1087 break;
1088 }
1089
1090 return;
1091 }
1092
1093 /* Uninstall the route from kernel. */
1094 void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re)
1095 {
1096 struct nexthop *nexthop;
1097 rib_table_info_t *info = srcdest_rnode_table_info(rn);
1098 struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id);
1099
1100 if (info->safi != SAFI_UNICAST) {
1101 UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
1102 for (ALL_NEXTHOPS(re->ng, nexthop))
1103 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
1104 return;
1105 }
1106
1107 /*
1108 * Make sure we update the FPM any time we send new information to
1109 * the dataplane.
1110 */
1111 hook_call(rib_update, rn, "uninstalling from kernel");
1112
1113 switch (dplane_route_delete(rn, re)) {
1114 case ZEBRA_DPLANE_REQUEST_QUEUED:
1115 if (zvrf)
1116 zvrf->removals_queued++;
1117 break;
1118 case ZEBRA_DPLANE_REQUEST_FAILURE:
1119 {
1120 char str[SRCDEST2STR_BUFFER];
1121
1122 srcdest_rnode2str(rn, str, sizeof(str));
1123 flog_err(EC_ZEBRA_DP_INSTALL_FAIL,
1124 "%u:%s: Failed to enqueue dataplane uninstall",
1125 re->vrf_id, str);
1126 break;
1127 }
1128 case ZEBRA_DPLANE_REQUEST_SUCCESS:
1129 if (zvrf)
1130 zvrf->removals++;
1131 break;
1132 }
1133
1134 return;
1135 }
1136
1137 /* Uninstall the route from kernel. */
1138 static void rib_uninstall(struct route_node *rn, struct route_entry *re)
1139 {
1140 rib_table_info_t *info = srcdest_rnode_table_info(rn);
1141 rib_dest_t *dest = rib_dest_from_rnode(rn);
1142 struct nexthop *nexthop;
1143
1144 if (dest && dest->selected_fib == re) {
1145 if (info->safi == SAFI_UNICAST)
1146 hook_call(rib_update, rn, "rib_uninstall");
1147
1148 /* If labeled-unicast route, uninstall transit LSP. */
1149 if (zebra_rib_labeled_unicast(re))
1150 zebra_mpls_lsp_uninstall(info->zvrf, rn, re);
1151
1152 rib_uninstall_kernel(rn, re);
1153
1154 dest->selected_fib = NULL;
1155
1156 for (ALL_NEXTHOPS(re->ng, nexthop))
1157 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
1158 }
1159
1160 if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) {
1161 const struct prefix *p, *src_p;
1162
1163 srcdest_rnode_prefixes(rn, &p, &src_p);
1164
1165 redistribute_delete(p, src_p, re);
1166 UNSET_FLAG(re->flags, ZEBRA_FLAG_SELECTED);
1167 }
1168 }
1169
1170 /*
1171 * rib_can_delete_dest
1172 *
1173 * Returns TRUE if the given dest can be deleted from the table.
1174 */
1175 static int rib_can_delete_dest(rib_dest_t *dest)
1176 {
1177 if (re_list_first(&dest->routes)) {
1178 return 0;
1179 }
1180
1181 /*
1182 * Unresolved rnh's are stored on the default route's list
1183 *
1184 * dest->rnode can also be the source prefix node in an
1185 * ipv6 sourcedest table. Fortunately the prefix of a
1186 * source prefix node can never be the default prefix.
1187 */
1188 if (is_default_prefix(&dest->rnode->p))
1189 return 0;
1190
1191 /*
1192 * Don't delete the dest if we have to update the FPM about this
1193 * prefix.
1194 */
1195 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)
1196 || CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM))
1197 return 0;
1198
1199 return 1;
1200 }
1201
1202 void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq)
1203 {
1204 rib_dest_t *dest = rib_dest_from_rnode(rn);
1205 struct rnh *rnh;
1206
1207 /*
1208 * We are storing the rnh's associated withb
1209 * the tracked nexthop as a list of the rn's.
1210 * Unresolved rnh's are placed at the top
1211 * of the tree list.( 0.0.0.0/0 for v4 and 0::0/0 for v6 )
1212 * As such for each rn we need to walk up the tree
1213 * and see if any rnh's need to see if they
1214 * would match a more specific route
1215 */
1216 while (rn) {
1217 if (IS_ZEBRA_DEBUG_NHT_DETAILED) {
1218 char buf[PREFIX_STRLEN];
1219
1220 zlog_debug("%s: %s Being examined for Nexthop Tracking",
1221 __PRETTY_FUNCTION__,
1222 srcdest_rnode2str(rn, buf, sizeof(buf)));
1223 }
1224 if (!dest) {
1225 rn = rn->parent;
1226 if (rn)
1227 dest = rib_dest_from_rnode(rn);
1228 continue;
1229 }
1230 /*
1231 * If we have any rnh's stored in the nht list
1232 * then we know that this route node was used for
1233 * nht resolution and as such we need to call the
1234 * nexthop tracking evaluation code
1235 */
1236 for_each (rnh_list, &dest->nht, rnh) {
1237 struct zebra_vrf *zvrf =
1238 zebra_vrf_lookup_by_id(rnh->vrf_id);
1239 struct prefix *p = &rnh->node->p;
1240
1241 if (IS_ZEBRA_DEBUG_NHT_DETAILED) {
1242 char buf1[PREFIX_STRLEN];
1243 char buf2[PREFIX_STRLEN];
1244
1245 zlog_debug("%u:%s has Nexthop(%s) depending on it, evaluating %u:%u",
1246 zvrf->vrf->vrf_id,
1247 srcdest_rnode2str(rn, buf1,
1248 sizeof(buf1)),
1249 prefix2str(p, buf2, sizeof(buf2)),
1250 seq, rnh->seqno);
1251 }
1252
1253 /*
1254 * If we have evaluated this node on this pass
1255 * already, due to following the tree up
1256 * then we know that we can move onto the next
1257 * rnh to process.
1258 *
1259 * Additionally we call zebra_evaluate_rnh
1260 * when we gc the dest. In this case we know
1261 * that there must be no other re's where
1262 * we were originally as such we know that
1263 * that sequence number is ok to respect.
1264 */
1265 if (rnh->seqno == seq) {
1266 if (IS_ZEBRA_DEBUG_NHT_DETAILED)
1267 zlog_debug(
1268 "\tNode processed and moved already");
1269 continue;
1270 }
1271
1272 rnh->seqno = seq;
1273 zebra_evaluate_rnh(zvrf, family2afi(p->family), 0,
1274 rnh->type, p);
1275 }
1276
1277 rn = rn->parent;
1278 if (rn)
1279 dest = rib_dest_from_rnode(rn);
1280 }
1281 }
1282
1283 /*
1284 * rib_gc_dest
1285 *
1286 * Garbage collect the rib dest corresponding to the given route node
1287 * if appropriate.
1288 *
1289 * Returns TRUE if the dest was deleted, FALSE otherwise.
1290 */
1291 int rib_gc_dest(struct route_node *rn)
1292 {
1293 rib_dest_t *dest;
1294
1295 dest = rib_dest_from_rnode(rn);
1296 if (!dest)
1297 return 0;
1298
1299 if (!rib_can_delete_dest(dest))
1300 return 0;
1301
1302 if (IS_ZEBRA_DEBUG_RIB) {
1303 struct zebra_vrf *zvrf;
1304
1305 zvrf = rib_dest_vrf(dest);
1306 rnode_debug(rn, zvrf_id(zvrf), "removing dest from table");
1307 }
1308
1309 zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence());
1310
1311 dest->rnode = NULL;
1312 rnh_list_fini(&dest->nht);
1313 XFREE(MTYPE_RIB_DEST, dest);
1314 rn->info = NULL;
1315
1316 /*
1317 * Release the one reference that we keep on the route node.
1318 */
1319 route_unlock_node(rn);
1320 return 1;
1321 }
1322
1323 static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn,
1324 struct route_entry *new)
1325 {
1326 hook_call(rib_update, rn, "new route selected");
1327
1328 /* Update real nexthop. This may actually determine if nexthop is active
1329 * or not. */
1330 if (!nexthop_group_active_nexthop_num(&new->ng)) {
1331 UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED);
1332 return;
1333 }
1334
1335 if (IS_ZEBRA_DEBUG_RIB) {
1336 char buf[SRCDEST2STR_BUFFER];
1337 srcdest_rnode2str(rn, buf, sizeof(buf));
1338 zlog_debug("%u:%s: Adding route rn %p, re %p (%s)",
1339 zvrf_id(zvrf), buf, rn, new,
1340 zebra_route_string(new->type));
1341 }
1342
1343 /* If labeled-unicast route, install transit LSP. */
1344 if (zebra_rib_labeled_unicast(new))
1345 zebra_mpls_lsp_install(zvrf, rn, new);
1346
1347 rib_install_kernel(rn, new, NULL);
1348
1349 UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED);
1350 }
1351
1352 static void rib_process_del_fib(struct zebra_vrf *zvrf, struct route_node *rn,
1353 struct route_entry *old)
1354 {
1355 hook_call(rib_update, rn, "removing existing route");
1356
1357 /* Uninstall from kernel. */
1358 if (IS_ZEBRA_DEBUG_RIB) {
1359 char buf[SRCDEST2STR_BUFFER];
1360 srcdest_rnode2str(rn, buf, sizeof(buf));
1361 zlog_debug("%u:%s: Deleting route rn %p, re %p (%s)",
1362 zvrf_id(zvrf), buf, rn, old,
1363 zebra_route_string(old->type));
1364 }
1365
1366 /* If labeled-unicast route, uninstall transit LSP. */
1367 if (zebra_rib_labeled_unicast(old))
1368 zebra_mpls_lsp_uninstall(zvrf, rn, old);
1369
1370 rib_uninstall_kernel(rn, old);
1371
1372 /* Update nexthop for route, reset changed flag. */
1373 /* Note: this code also handles the Linux case when an interface goes
1374 * down, causing the kernel to delete routes without sending DELROUTE
1375 * notifications
1376 */
1377 if (RIB_KERNEL_ROUTE(old))
1378 SET_FLAG(old->status, ROUTE_ENTRY_REMOVED);
1379 else
1380 UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED);
1381 }
1382
1383 static void rib_process_update_fib(struct zebra_vrf *zvrf,
1384 struct route_node *rn,
1385 struct route_entry *old,
1386 struct route_entry *new)
1387 {
1388 int nh_active = 0;
1389
1390 /*
1391 * We have to install or update if a new route has been selected or
1392 * something has changed.
1393 */
1394 if (new != old || CHECK_FLAG(new->status, ROUTE_ENTRY_CHANGED)) {
1395 hook_call(rib_update, rn, "updating existing route");
1396
1397 /* Update the nexthop; we could determine here that nexthop is
1398 * inactive. */
1399 if (nexthop_group_active_nexthop_num(&new->ng))
1400 nh_active = 1;
1401
1402 /* If nexthop is active, install the selected route, if
1403 * appropriate. If
1404 * the install succeeds, cleanup flags for prior route, if
1405 * different from
1406 * newly selected.
1407 */
1408 if (nh_active) {
1409 if (IS_ZEBRA_DEBUG_RIB) {
1410 char buf[SRCDEST2STR_BUFFER];
1411 srcdest_rnode2str(rn, buf, sizeof(buf));
1412 if (new != old)
1413 zlog_debug(
1414 "%u:%s: Updating route rn %p, re %p (%s) old %p (%s)",
1415 zvrf_id(zvrf), buf, rn, new,
1416 zebra_route_string(new->type),
1417 old,
1418 zebra_route_string(old->type));
1419 else
1420 zlog_debug(
1421 "%u:%s: Updating route rn %p, re %p (%s)",
1422 zvrf_id(zvrf), buf, rn, new,
1423 zebra_route_string(new->type));
1424 }
1425
1426 /* If labeled-unicast route, uninstall transit LSP. */
1427 if (zebra_rib_labeled_unicast(old))
1428 zebra_mpls_lsp_uninstall(zvrf, rn, old);
1429
1430 /*
1431 * Non-system route should be installed.
1432 * If labeled-unicast route, install transit
1433 * LSP.
1434 */
1435 if (zebra_rib_labeled_unicast(new))
1436 zebra_mpls_lsp_install(zvrf, rn, new);
1437
1438 rib_install_kernel(rn, new, old);
1439 }
1440
1441 /*
1442 * If nexthop for selected route is not active or install
1443 * failed, we
1444 * may need to uninstall and delete for redistribution.
1445 */
1446 if (!nh_active) {
1447 if (IS_ZEBRA_DEBUG_RIB) {
1448 char buf[SRCDEST2STR_BUFFER];
1449 srcdest_rnode2str(rn, buf, sizeof(buf));
1450 if (new != old)
1451 zlog_debug(
1452 "%u:%s: Deleting route rn %p, re %p (%s) old %p (%s) - nexthop inactive",
1453 zvrf_id(zvrf), buf, rn, new,
1454 zebra_route_string(new->type),
1455 old,
1456 zebra_route_string(old->type));
1457 else
1458 zlog_debug(
1459 "%u:%s: Deleting route rn %p, re %p (%s) - nexthop inactive",
1460 zvrf_id(zvrf), buf, rn, new,
1461 zebra_route_string(new->type));
1462 }
1463
1464 /* If labeled-unicast route, uninstall transit LSP. */
1465 if (zebra_rib_labeled_unicast(old))
1466 zebra_mpls_lsp_uninstall(zvrf, rn, old);
1467
1468 rib_uninstall_kernel(rn, old);
1469 }
1470 } else {
1471 /*
1472 * Same route selected; check if in the FIB and if not,
1473 * re-install. This is housekeeping code to deal with
1474 * race conditions in kernel with linux netlink reporting
1475 * interface up before IPv4 or IPv6 protocol is ready
1476 * to add routes.
1477 */
1478 if (!CHECK_FLAG(new->status, ROUTE_ENTRY_INSTALLED) ||
1479 RIB_SYSTEM_ROUTE(new))
1480 rib_install_kernel(rn, new, NULL);
1481 }
1482
1483 /* Update prior route. */
1484 if (new != old)
1485 UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED);
1486
1487 /* Clear changed flag. */
1488 UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED);
1489 }
1490
1491 /* Check if 'alternate' RIB entry is better than 'current'. */
1492 static struct route_entry *rib_choose_best(struct route_entry *current,
1493 struct route_entry *alternate)
1494 {
1495 if (current == NULL)
1496 return alternate;
1497
1498 /* filter route selection in following order:
1499 * - connected beats other types
1500 * - if both connected, loopback or vrf wins
1501 * - lower distance beats higher
1502 * - lower metric beats higher for equal distance
1503 * - last, hence oldest, route wins tie break.
1504 */
1505
1506 /* Connected routes. Check to see if either are a vrf
1507 * or loopback interface. If not, pick the last connected
1508 * route of the set of lowest metric connected routes.
1509 */
1510 if (alternate->type == ZEBRA_ROUTE_CONNECT) {
1511 if (current->type != ZEBRA_ROUTE_CONNECT)
1512 return alternate;
1513
1514 /* both are connected. are either loop or vrf? */
1515 struct nexthop *nexthop = NULL;
1516
1517 for (ALL_NEXTHOPS(alternate->ng, nexthop)) {
1518 if (if_is_loopback_or_vrf(if_lookup_by_index(
1519 nexthop->ifindex, alternate->vrf_id)))
1520 return alternate;
1521 }
1522
1523 for (ALL_NEXTHOPS(current->ng, nexthop)) {
1524 if (if_is_loopback_or_vrf(if_lookup_by_index(
1525 nexthop->ifindex, current->vrf_id)))
1526 return current;
1527 }
1528
1529 /* Neither are loop or vrf so pick best metric */
1530 if (alternate->metric <= current->metric)
1531 return alternate;
1532
1533 return current;
1534 }
1535
1536 if (current->type == ZEBRA_ROUTE_CONNECT)
1537 return current;
1538
1539 /* higher distance loses */
1540 if (alternate->distance < current->distance)
1541 return alternate;
1542 if (current->distance < alternate->distance)
1543 return current;
1544
1545 /* metric tie-breaks equal distance */
1546 if (alternate->metric <= current->metric)
1547 return alternate;
1548
1549 return current;
1550 }
1551
1552 /* Core function for processing routing information base. */
1553 static void rib_process(struct route_node *rn)
1554 {
1555 struct route_entry *re;
1556 struct route_entry *next;
1557 struct route_entry *old_selected = NULL;
1558 struct route_entry *new_selected = NULL;
1559 struct route_entry *old_fib = NULL;
1560 struct route_entry *new_fib = NULL;
1561 struct route_entry *best = NULL;
1562 char buf[SRCDEST2STR_BUFFER];
1563 rib_dest_t *dest;
1564 struct zebra_vrf *zvrf = NULL;
1565 const struct prefix *p, *src_p;
1566
1567 srcdest_rnode_prefixes(rn, &p, &src_p);
1568 vrf_id_t vrf_id = VRF_UNKNOWN;
1569
1570 assert(rn);
1571
1572 dest = rib_dest_from_rnode(rn);
1573 if (dest) {
1574 zvrf = rib_dest_vrf(dest);
1575 vrf_id = zvrf_id(zvrf);
1576 }
1577
1578 if (IS_ZEBRA_DEBUG_RIB)
1579 srcdest_rnode2str(rn, buf, sizeof(buf));
1580
1581 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
1582 zlog_debug("%u:%s: Processing rn %p", vrf_id, buf, rn);
1583
1584 /*
1585 * we can have rn's that have a NULL info pointer
1586 * (dest). As such let's not let the deref happen
1587 * additionally we know RNODE_FOREACH_RE_SAFE
1588 * will not iterate so we are ok.
1589 */
1590 if (dest)
1591 old_fib = dest->selected_fib;
1592
1593 RNODE_FOREACH_RE_SAFE (rn, re, next) {
1594 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
1595 zlog_debug(
1596 "%u:%s: Examine re %p (%s) status %x flags %x dist %d metric %d",
1597 vrf_id, buf, re, zebra_route_string(re->type),
1598 re->status, re->flags, re->distance,
1599 re->metric);
1600
1601 UNSET_FLAG(re->status, ROUTE_ENTRY_NEXTHOPS_CHANGED);
1602
1603 /* Currently selected re. */
1604 if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) {
1605 assert(old_selected == NULL);
1606 old_selected = re;
1607 }
1608
1609 /* Skip deleted entries from selection */
1610 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
1611 continue;
1612
1613 /* Skip unreachable nexthop. */
1614 /* This first call to nexthop_active_update is merely to
1615 * determine if there's any change to nexthops associated
1616 * with this RIB entry. Now, rib_process() can be invoked due
1617 * to an external event such as link down or due to
1618 * next-hop-tracking evaluation. In the latter case,
1619 * a decision has already been made that the NHs have changed.
1620 * So, no need to invoke a potentially expensive call again.
1621 * Further, since the change might be in a recursive NH which
1622 * is not caught in the nexthop_active_update() code. Thus, we
1623 * might miss changes to recursive NHs.
1624 */
1625 if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)
1626 && !nexthop_active_update(rn, re)) {
1627 if (re->type == ZEBRA_ROUTE_TABLE) {
1628 /* XXX: HERE BE DRAGONS!!!!!
1629 * In all honesty, I have not yet figured out
1630 * what this part does or why the
1631 * ROUTE_ENTRY_CHANGED test above is correct
1632 * or why we need to delete a route here, and
1633 * also not whether this concerns both selected
1634 * and fib route, or only selected
1635 * or only fib
1636 *
1637 * This entry was denied by the 'ip protocol
1638 * table' route-map, we need to delete it */
1639 if (re != old_selected) {
1640 if (IS_ZEBRA_DEBUG_RIB)
1641 zlog_debug(
1642 "%s: %u:%s: imported via import-table but denied "
1643 "by the ip protocol table route-map",
1644 __func__, vrf_id, buf);
1645 rib_unlink(rn, re);
1646 } else
1647 SET_FLAG(re->status,
1648 ROUTE_ENTRY_REMOVED);
1649 }
1650
1651 continue;
1652 }
1653
1654 /* Infinite distance. */
1655 if (re->distance == DISTANCE_INFINITY) {
1656 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
1657 continue;
1658 }
1659
1660 if (CHECK_FLAG(re->flags, ZEBRA_FLAG_FIB_OVERRIDE)) {
1661 best = rib_choose_best(new_fib, re);
1662 if (new_fib && best != new_fib)
1663 UNSET_FLAG(new_fib->status,
1664 ROUTE_ENTRY_CHANGED);
1665 new_fib = best;
1666 } else {
1667 best = rib_choose_best(new_selected, re);
1668 if (new_selected && best != new_selected)
1669 UNSET_FLAG(new_selected->status,
1670 ROUTE_ENTRY_CHANGED);
1671 new_selected = best;
1672 }
1673 if (best != re)
1674 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
1675 } /* RNODE_FOREACH_RE */
1676
1677 /* If no FIB override route, use the selected route also for FIB */
1678 if (new_fib == NULL)
1679 new_fib = new_selected;
1680
1681 /* After the cycle is finished, the following pointers will be set:
1682 * old_selected --- RE entry currently having SELECTED
1683 * new_selected --- RE entry that is newly SELECTED
1684 * old_fib --- RE entry currently in kernel FIB
1685 * new_fib --- RE entry that is newly to be in kernel FIB
1686 *
1687 * new_selected will get SELECTED flag, and is going to be redistributed
1688 * the zclients. new_fib (which can be new_selected) will be installed
1689 * in kernel.
1690 */
1691
1692 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
1693 zlog_debug(
1694 "%u:%s: After processing: old_selected %p new_selected %p old_fib %p new_fib %p",
1695 vrf_id, buf, (void *)old_selected, (void *)new_selected,
1696 (void *)old_fib, (void *)new_fib);
1697 }
1698
1699 /* Buffer ROUTE_ENTRY_CHANGED here, because it will get cleared if
1700 * fib == selected */
1701 bool selected_changed = new_selected && CHECK_FLAG(new_selected->status,
1702 ROUTE_ENTRY_CHANGED);
1703
1704 /* Update fib according to selection results */
1705 if (new_fib && old_fib)
1706 rib_process_update_fib(zvrf, rn, old_fib, new_fib);
1707 else if (new_fib)
1708 rib_process_add_fib(zvrf, rn, new_fib);
1709 else if (old_fib)
1710 rib_process_del_fib(zvrf, rn, old_fib);
1711
1712 /* Update SELECTED entry */
1713 if (old_selected != new_selected || selected_changed) {
1714
1715 if (new_selected && new_selected != new_fib)
1716 UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED);
1717
1718 if (new_selected)
1719 SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED);
1720
1721 if (old_selected) {
1722 if (!new_selected)
1723 redistribute_delete(p, src_p, old_selected);
1724 if (old_selected != new_selected)
1725 UNSET_FLAG(old_selected->flags,
1726 ZEBRA_FLAG_SELECTED);
1727 }
1728 }
1729
1730 /* Remove all RE entries queued for removal */
1731 RNODE_FOREACH_RE_SAFE (rn, re, next) {
1732 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) {
1733 if (IS_ZEBRA_DEBUG_RIB) {
1734 rnode_debug(rn, vrf_id, "rn %p, removing re %p",
1735 (void *)rn, (void *)re);
1736 }
1737 rib_unlink(rn, re);
1738 }
1739 }
1740
1741 /*
1742 * Check if the dest can be deleted now.
1743 */
1744 rib_gc_dest(rn);
1745 }
1746
1747 static void zebra_rib_evaluate_mpls(struct route_node *rn)
1748 {
1749 rib_dest_t *dest = rib_dest_from_rnode(rn);
1750 struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT);
1751
1752 if (!dest)
1753 return;
1754
1755 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS)) {
1756 if (IS_ZEBRA_DEBUG_MPLS)
1757 zlog_debug(
1758 "%u: Scheduling all LSPs upon RIB completion",
1759 zvrf_id(zvrf));
1760 zebra_mpls_lsp_schedule(zvrf);
1761 mpls_unmark_lsps_for_processing(rn);
1762 }
1763 }
1764
1765 /*
1766 * Utility to match route with dplane context data
1767 */
1768 static bool rib_route_match_ctx(const struct route_entry *re,
1769 const struct zebra_dplane_ctx *ctx,
1770 bool is_update)
1771 {
1772 bool result = false;
1773
1774 if (is_update) {
1775 /*
1776 * In 'update' case, we test info about the 'previous' or
1777 * 'old' route
1778 */
1779 if ((re->type == dplane_ctx_get_old_type(ctx)) &&
1780 (re->instance == dplane_ctx_get_old_instance(ctx))) {
1781 result = true;
1782
1783 /* TODO -- we're using this extra test, but it's not
1784 * exactly clear why.
1785 */
1786 if (re->type == ZEBRA_ROUTE_STATIC &&
1787 (re->distance != dplane_ctx_get_old_distance(ctx) ||
1788 re->tag != dplane_ctx_get_old_tag(ctx))) {
1789 result = false;
1790 }
1791 }
1792
1793 } else {
1794 /*
1795 * Ordinary, single-route case using primary context info
1796 */
1797 if ((dplane_ctx_get_op(ctx) != DPLANE_OP_ROUTE_DELETE) &&
1798 CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) {
1799 /* Skip route that's been deleted */
1800 goto done;
1801 }
1802
1803 if ((re->type == dplane_ctx_get_type(ctx)) &&
1804 (re->instance == dplane_ctx_get_instance(ctx))) {
1805 result = true;
1806
1807 /* TODO -- we're using this extra test, but it's not
1808 * exactly clear why.
1809 */
1810 if (re->type == ZEBRA_ROUTE_STATIC &&
1811 (re->distance != dplane_ctx_get_distance(ctx) ||
1812 re->tag != dplane_ctx_get_tag(ctx))) {
1813 result = false;
1814 }
1815 }
1816 }
1817
1818 done:
1819
1820 return (result);
1821 }
1822
1823 static void zebra_rib_fixup_system(struct route_node *rn)
1824 {
1825 struct route_entry *re;
1826
1827 RNODE_FOREACH_RE(rn, re) {
1828 struct nexthop *nhop;
1829
1830 if (!RIB_SYSTEM_ROUTE(re))
1831 continue;
1832
1833 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
1834 continue;
1835
1836 SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
1837
1838 for (ALL_NEXTHOPS(re->ng, nhop)) {
1839 if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE))
1840 continue;
1841
1842 SET_FLAG(nhop->flags, NEXTHOP_FLAG_FIB);
1843 }
1844 }
1845 }
1846
1847 /*
1848 * Route-update results processing after async dataplane update.
1849 */
1850 static void rib_process_result(struct zebra_dplane_ctx *ctx)
1851 {
1852 struct route_table *table = NULL;
1853 struct zebra_vrf *zvrf = NULL;
1854 struct route_node *rn = NULL;
1855 struct route_entry *re = NULL, *old_re = NULL, *rib;
1856 bool is_update = false;
1857 struct nexthop *nexthop, *ctx_nexthop;
1858 char dest_str[PREFIX_STRLEN] = "";
1859 enum dplane_op_e op;
1860 enum zebra_dplane_result status;
1861 const struct prefix *dest_pfx, *src_pfx;
1862 uint32_t seq;
1863
1864 /* Locate rn and re(s) from ctx */
1865
1866 table = zebra_vrf_table_with_table_id(dplane_ctx_get_afi(ctx),
1867 dplane_ctx_get_safi(ctx),
1868 dplane_ctx_get_vrf(ctx),
1869 dplane_ctx_get_table(ctx));
1870 if (table == NULL) {
1871 if (IS_ZEBRA_DEBUG_DPLANE) {
1872 zlog_debug("Failed to process dplane results: no table for afi %d, safi %d, vrf %u",
1873 dplane_ctx_get_afi(ctx),
1874 dplane_ctx_get_safi(ctx),
1875 dplane_ctx_get_vrf(ctx));
1876 }
1877 goto done;
1878 }
1879
1880 zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx));
1881
1882 dest_pfx = dplane_ctx_get_dest(ctx);
1883
1884 /* Note well: only capturing the prefix string if debug is enabled here;
1885 * unconditional log messages will have to generate the string.
1886 */
1887 if (IS_ZEBRA_DEBUG_DPLANE)
1888 prefix2str(dest_pfx, dest_str, sizeof(dest_str));
1889
1890 src_pfx = dplane_ctx_get_src(ctx);
1891 rn = srcdest_rnode_get(table, dplane_ctx_get_dest(ctx),
1892 src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL);
1893 if (rn == NULL) {
1894 if (IS_ZEBRA_DEBUG_DPLANE) {
1895 zlog_debug("Failed to process dplane results: no route for %u:%s",
1896 dplane_ctx_get_vrf(ctx), dest_str);
1897 }
1898 goto done;
1899 }
1900
1901 srcdest_rnode_prefixes(rn, &dest_pfx, &src_pfx);
1902
1903 op = dplane_ctx_get_op(ctx);
1904 status = dplane_ctx_get_status(ctx);
1905
1906 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
1907 zlog_debug("%u:%s Processing dplane ctx %p, op %s result %s",
1908 dplane_ctx_get_vrf(ctx), dest_str, ctx,
1909 dplane_op2str(op), dplane_res2str(status));
1910
1911 /*
1912 * Update is a bit of a special case, where we may have both old and new
1913 * routes to post-process.
1914 */
1915 is_update = dplane_ctx_is_update(ctx);
1916
1917 /*
1918 * Take a pass through the routes, look for matches with the context
1919 * info.
1920 */
1921 RNODE_FOREACH_RE(rn, rib) {
1922
1923 if (re == NULL) {
1924 if (rib_route_match_ctx(rib, ctx, false))
1925 re = rib;
1926 }
1927
1928 /* Check for old route match */
1929 if (is_update && (old_re == NULL)) {
1930 if (rib_route_match_ctx(rib, ctx, true /*is_update*/))
1931 old_re = rib;
1932 }
1933
1934 /* Have we found the routes we need to work on? */
1935 if (re && ((!is_update || old_re)))
1936 break;
1937 }
1938
1939 seq = dplane_ctx_get_seq(ctx);
1940
1941 /*
1942 * Check sequence number(s) to detect stale results before continuing
1943 */
1944 if (re) {
1945 if (re->dplane_sequence != seq) {
1946 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
1947 zlog_debug("%u:%s Stale dplane result for re %p",
1948 dplane_ctx_get_vrf(ctx),
1949 dest_str, re);
1950 } else
1951 UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
1952 }
1953
1954 if (old_re) {
1955 if (old_re->dplane_sequence != dplane_ctx_get_old_seq(ctx)) {
1956 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
1957 zlog_debug("%u:%s Stale dplane result for old_re %p",
1958 dplane_ctx_get_vrf(ctx),
1959 dest_str, old_re);
1960 } else
1961 UNSET_FLAG(old_re->status, ROUTE_ENTRY_QUEUED);
1962 }
1963
1964 switch (op) {
1965 case DPLANE_OP_ROUTE_INSTALL:
1966 case DPLANE_OP_ROUTE_UPDATE:
1967 if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) {
1968 if (re) {
1969 UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED);
1970 SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
1971 }
1972 /*
1973 * On an update operation from the same route type
1974 * context retrieval currently has no way to know
1975 * which was the old and which was the new.
1976 * So don't unset our flags that we just set.
1977 * We know redistribution is ok because the
1978 * old_re in this case is used for nothing
1979 * more than knowing whom to contact if necessary.
1980 */
1981 if (old_re && old_re != re) {
1982 UNSET_FLAG(old_re->status, ROUTE_ENTRY_FAILED);
1983 UNSET_FLAG(old_re->status,
1984 ROUTE_ENTRY_INSTALLED);
1985 }
1986 /* Update zebra nexthop FIB flag for each
1987 * nexthop that was installed.
1988 */
1989 for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
1990 ctx_nexthop)) {
1991
1992 if (!re)
1993 continue;
1994
1995 for (ALL_NEXTHOPS(re->ng, nexthop)) {
1996 if (nexthop_same(ctx_nexthop, nexthop))
1997 break;
1998 }
1999
2000 if (nexthop == NULL)
2001 continue;
2002
2003 if (CHECK_FLAG(nexthop->flags,
2004 NEXTHOP_FLAG_RECURSIVE))
2005 continue;
2006
2007 if (CHECK_FLAG(ctx_nexthop->flags,
2008 NEXTHOP_FLAG_FIB))
2009 SET_FLAG(nexthop->flags,
2010 NEXTHOP_FLAG_FIB);
2011 else
2012 UNSET_FLAG(nexthop->flags,
2013 NEXTHOP_FLAG_FIB);
2014 }
2015
2016 /*
2017 * System routes are weird in that they
2018 * allow multiple to be installed that match
2019 * to the same prefix, so after we get the
2020 * result we need to clean them up so that
2021 * we can actually use them.
2022 */
2023 if ((re && RIB_SYSTEM_ROUTE(re)) ||
2024 (old_re && RIB_SYSTEM_ROUTE(old_re)))
2025 zebra_rib_fixup_system(rn);
2026
2027 if (zvrf)
2028 zvrf->installs++;
2029
2030 /* Redistribute */
2031 /*
2032 * TODO -- still calling the redist api using the
2033 * route_entries, and there's a corner-case here:
2034 * if there's no client for the 'new' route, a redist
2035 * deleting the 'old' route will be sent. But if the
2036 * 'old' context info was stale, 'old_re' will be
2037 * NULL here and that delete will not be sent.
2038 */
2039 if (re)
2040 redistribute_update(dest_pfx, src_pfx,
2041 re, old_re);
2042
2043 /* Notify route owner */
2044 zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED);
2045
2046 } else {
2047 if (re) {
2048 SET_FLAG(re->status, ROUTE_ENTRY_FAILED);
2049 UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
2050 } if (old_re)
2051 SET_FLAG(old_re->status, ROUTE_ENTRY_FAILED);
2052 if (re)
2053 zsend_route_notify_owner(re, dest_pfx,
2054 ZAPI_ROUTE_FAIL_INSTALL);
2055
2056 zlog_warn("%u:%s: Route install failed",
2057 dplane_ctx_get_vrf(ctx),
2058 prefix2str(dest_pfx,
2059 dest_str, sizeof(dest_str)));
2060 }
2061 break;
2062 case DPLANE_OP_ROUTE_DELETE:
2063 if (re)
2064 SET_FLAG(re->status, ROUTE_ENTRY_FAILED);
2065 /*
2066 * In the delete case, the zebra core datastructs were
2067 * updated (or removed) at the time the delete was issued,
2068 * so we're just notifying the route owner.
2069 */
2070 if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) {
2071 if (re) {
2072 UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
2073 UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED);
2074 }
2075 zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED);
2076
2077 if (zvrf)
2078 zvrf->removals++;
2079 } else {
2080 if (re)
2081 SET_FLAG(re->status, ROUTE_ENTRY_FAILED);
2082 zsend_route_notify_owner_ctx(ctx,
2083 ZAPI_ROUTE_REMOVE_FAIL);
2084
2085 zlog_warn("%u:%s: Route Deletion failure",
2086 dplane_ctx_get_vrf(ctx),
2087 prefix2str(dest_pfx,
2088 dest_str, sizeof(dest_str)));
2089 }
2090
2091 /*
2092 * System routes are weird in that they
2093 * allow multiple to be installed that match
2094 * to the same prefix, so after we get the
2095 * result we need to clean them up so that
2096 * we can actually use them.
2097 */
2098 if ((re && RIB_SYSTEM_ROUTE(re)) ||
2099 (old_re && RIB_SYSTEM_ROUTE(old_re)))
2100 zebra_rib_fixup_system(rn);
2101 break;
2102 default:
2103 break;
2104 }
2105
2106 zebra_rib_evaluate_rn_nexthops(rn, seq);
2107 zebra_rib_evaluate_mpls(rn);
2108 done:
2109
2110 if (rn)
2111 route_unlock_node(rn);
2112
2113 /* Return context to dataplane module */
2114 dplane_ctx_fini(&ctx);
2115 }
2116
2117 /* Take a list of route_node structs and return 1, if there was a record
2118 * picked from it and processed by rib_process(). Don't process more,
2119 * than one RN record; operate only in the specified sub-queue.
2120 */
2121 static unsigned int process_subq(struct list *subq, uint8_t qindex)
2122 {
2123 struct listnode *lnode = listhead(subq);
2124 struct route_node *rnode;
2125 rib_dest_t *dest;
2126 struct zebra_vrf *zvrf = NULL;
2127
2128 if (!lnode)
2129 return 0;
2130
2131 rnode = listgetdata(lnode);
2132 dest = rib_dest_from_rnode(rnode);
2133 if (dest)
2134 zvrf = rib_dest_vrf(dest);
2135
2136 rib_process(rnode);
2137
2138 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
2139 char buf[SRCDEST2STR_BUFFER];
2140 srcdest_rnode2str(rnode, buf, sizeof(buf));
2141 zlog_debug("%u:%s: rn %p dequeued from sub-queue %u",
2142 zvrf ? zvrf_id(zvrf) : 0, buf, rnode, qindex);
2143 }
2144
2145 if (rnode->info)
2146 UNSET_FLAG(rib_dest_from_rnode(rnode)->flags,
2147 RIB_ROUTE_QUEUED(qindex));
2148
2149 #if 0
2150 else
2151 {
2152 zlog_debug ("%s: called for route_node (%p, %d) with no ribs",
2153 __func__, rnode, rnode->lock);
2154 zlog_backtrace(LOG_DEBUG);
2155 }
2156 #endif
2157 route_unlock_node(rnode);
2158 list_delete_node(subq, lnode);
2159 return 1;
2160 }
2161
2162
2163 /*
2164 * Perform next-hop tracking processing after RIB updates.
2165 */
2166 static void do_nht_processing(void)
2167 {
2168 }
2169
2170 /* Dispatch the meta queue by picking, processing and unlocking the next RN from
2171 * a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and
2172 * data
2173 * is pointed to the meta queue structure.
2174 */
2175 static wq_item_status meta_queue_process(struct work_queue *dummy, void *data)
2176 {
2177 struct meta_queue *mq = data;
2178 unsigned i;
2179 uint32_t queue_len, queue_limit;
2180
2181 /* Ensure there's room for more dataplane updates */
2182 queue_limit = dplane_get_in_queue_limit();
2183 queue_len = dplane_get_in_queue_len();
2184 if (queue_len > queue_limit) {
2185 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2186 zlog_debug("rib queue: dplane queue len %u, limit %u, retrying",
2187 queue_len, queue_limit);
2188
2189 /* Ensure that the meta-queue is actually enqueued */
2190 if (work_queue_empty(zrouter.ribq))
2191 work_queue_add(zrouter.ribq, zrouter.mq);
2192
2193 return WQ_QUEUE_BLOCKED;
2194 }
2195
2196 for (i = 0; i < MQ_SIZE; i++)
2197 if (process_subq(mq->subq[i], i)) {
2198 mq->size--;
2199 break;
2200 }
2201 return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
2202 }
2203
2204
2205 /*
2206 * Look into the RN and queue it into the highest priority queue
2207 * at this point in time for processing.
2208 *
2209 * We will enqueue a route node only once per invocation.
2210 *
2211 * There are two possibilities here that should be kept in mind.
2212 * If the original invocation has not been pulled off for processing
2213 * yet, A subsuquent invocation can have a route entry with a better
2214 * meta queue index value and we can have a situation where
2215 * we might have the same node enqueued 2 times. Not necessarily
2216 * an optimal situation but it should be ok.
2217 *
2218 * The other possibility is that the original invocation has not
2219 * been pulled off for processing yet, A subsusquent invocation
2220 * doesn't have a route_entry with a better meta-queue and the
2221 * original metaqueue index value will win and we'll end up with
2222 * the route node enqueued once.
2223 */
2224 static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn)
2225 {
2226 struct route_entry *re = NULL, *curr_re = NULL;
2227 uint8_t qindex = MQ_SIZE, curr_qindex = MQ_SIZE;
2228
2229 RNODE_FOREACH_RE (rn, curr_re) {
2230 curr_qindex = route_info[curr_re->type].meta_q_map;
2231
2232 if (curr_qindex <= qindex) {
2233 re = curr_re;
2234 qindex = curr_qindex;
2235 }
2236 }
2237
2238 if (!re)
2239 return;
2240
2241 /* Invariant: at this point we always have rn->info set. */
2242 if (CHECK_FLAG(rib_dest_from_rnode(rn)->flags,
2243 RIB_ROUTE_QUEUED(qindex))) {
2244 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2245 rnode_debug(rn, re->vrf_id,
2246 "rn %p is already queued in sub-queue %u",
2247 (void *)rn, qindex);
2248 return;
2249 }
2250
2251 SET_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex));
2252 listnode_add(mq->subq[qindex], rn);
2253 route_lock_node(rn);
2254 mq->size++;
2255
2256 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2257 rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %u",
2258 (void *)rn, qindex);
2259 }
2260
2261 /* Add route_node to work queue and schedule processing */
2262 void rib_queue_add(struct route_node *rn)
2263 {
2264 assert(rn);
2265
2266 /* Pointless to queue a route_node with no RIB entries to add or remove
2267 */
2268 if (!rnode_to_ribs(rn)) {
2269 zlog_debug("%s: called for route_node (%p, %d) with no ribs",
2270 __func__, (void *)rn, rn->lock);
2271 zlog_backtrace(LOG_DEBUG);
2272 return;
2273 }
2274
2275 if (zrouter.ribq == NULL) {
2276 flog_err(EC_ZEBRA_WQ_NONEXISTENT,
2277 "%s: work_queue does not exist!", __func__);
2278 return;
2279 }
2280
2281 /*
2282 * The RIB queue should normally be either empty or holding the only
2283 * work_queue_item element. In the latter case this element would
2284 * hold a pointer to the meta queue structure, which must be used to
2285 * actually queue the route nodes to process. So create the MQ
2286 * holder, if necessary, then push the work into it in any case.
2287 * This semantics was introduced after 0.99.9 release.
2288 */
2289 if (work_queue_empty(zrouter.ribq))
2290 work_queue_add(zrouter.ribq, zrouter.mq);
2291
2292 rib_meta_queue_add(zrouter.mq, rn);
2293
2294 return;
2295 }
2296
2297 /* Create new meta queue.
2298 A destructor function doesn't seem to be necessary here.
2299 */
2300 static struct meta_queue *meta_queue_new(void)
2301 {
2302 struct meta_queue *new;
2303 unsigned i;
2304
2305 new = XCALLOC(MTYPE_WORK_QUEUE, sizeof(struct meta_queue));
2306
2307 for (i = 0; i < MQ_SIZE; i++) {
2308 new->subq[i] = list_new();
2309 assert(new->subq[i]);
2310 }
2311
2312 return new;
2313 }
2314
2315 void meta_queue_free(struct meta_queue *mq)
2316 {
2317 unsigned i;
2318
2319 for (i = 0; i < MQ_SIZE; i++)
2320 list_delete(&mq->subq[i]);
2321
2322 XFREE(MTYPE_WORK_QUEUE, mq);
2323 }
2324
2325 /* initialise zebra rib work queue */
2326 static void rib_queue_init(void)
2327 {
2328 if (!(zrouter.ribq = work_queue_new(zrouter.master,
2329 "route_node processing"))) {
2330 flog_err(EC_ZEBRA_WQ_NONEXISTENT,
2331 "%s: could not initialise work queue!", __func__);
2332 return;
2333 }
2334
2335 /* fill in the work queue spec */
2336 zrouter.ribq->spec.workfunc = &meta_queue_process;
2337 zrouter.ribq->spec.errorfunc = NULL;
2338 zrouter.ribq->spec.completion_func = NULL;
2339 /* XXX: TODO: These should be runtime configurable via vty */
2340 zrouter.ribq->spec.max_retries = 3;
2341 zrouter.ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME;
2342 zrouter.ribq->spec.retry = ZEBRA_RIB_PROCESS_RETRY_TIME;
2343
2344 if (!(zrouter.mq = meta_queue_new())) {
2345 flog_err(EC_ZEBRA_WQ_NONEXISTENT,
2346 "%s: could not initialise meta queue!", __func__);
2347 return;
2348 }
2349 return;
2350 }
2351
2352 rib_dest_t *zebra_rib_create_dest(struct route_node *rn)
2353 {
2354 rib_dest_t *dest;
2355
2356 dest = XCALLOC(MTYPE_RIB_DEST, sizeof(rib_dest_t));
2357 rnh_list_init(&dest->nht);
2358 route_lock_node(rn); /* rn route table reference */
2359 rn->info = dest;
2360 dest->rnode = rn;
2361
2362 return dest;
2363 }
2364
2365 /* RIB updates are processed via a queue of pointers to route_nodes.
2366 *
2367 * The queue length is bounded by the maximal size of the routing table,
2368 * as a route_node will not be requeued, if already queued.
2369 *
2370 * REs are submitted via rib_addnode or rib_delnode which set minimal
2371 * state, or static_install_route (when an existing RE is updated)
2372 * and then submit route_node to queue for best-path selection later.
2373 * Order of add/delete state changes are preserved for any given RE.
2374 *
2375 * Deleted REs are reaped during best-path selection.
2376 *
2377 * rib_addnode
2378 * |-> rib_link or unset ROUTE_ENTRY_REMOVE |->Update kernel with
2379 * |-------->| | best RE, if required
2380 * | |
2381 * static_install->|->rib_addqueue...... -> rib_process
2382 * | |
2383 * |-------->| |-> rib_unlink
2384 * |-> set ROUTE_ENTRY_REMOVE |
2385 * rib_delnode (RE freed)
2386 *
2387 * The 'info' pointer of a route_node points to a rib_dest_t
2388 * ('dest'). Queueing state for a route_node is kept on the dest. The
2389 * dest is created on-demand by rib_link() and is kept around at least
2390 * as long as there are ribs hanging off it (@see rib_gc_dest()).
2391 *
2392 * Refcounting (aka "locking" throughout the GNU Zebra and Quagga code):
2393 *
2394 * - route_nodes: refcounted by:
2395 * - dest attached to route_node:
2396 * - managed by: rib_link/rib_gc_dest
2397 * - route_node processing queue
2398 * - managed by: rib_addqueue, rib_process.
2399 *
2400 */
2401
2402 /* Add RE to head of the route node. */
2403 static void rib_link(struct route_node *rn, struct route_entry *re, int process)
2404 {
2405 rib_dest_t *dest;
2406 afi_t afi;
2407 const char *rmap_name;
2408
2409 assert(re && rn);
2410
2411 dest = rib_dest_from_rnode(rn);
2412 if (!dest) {
2413 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2414 rnode_debug(rn, re->vrf_id, "rn %p adding dest", rn);
2415
2416 dest = zebra_rib_create_dest(rn);
2417 }
2418
2419 re_list_add_head(&dest->routes, re);
2420
2421 afi = (rn->p.family == AF_INET)
2422 ? AFI_IP
2423 : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX;
2424 if (is_zebra_import_table_enabled(afi, re->table)) {
2425 rmap_name = zebra_get_import_table_route_map(afi, re->table);
2426 zebra_add_import_table_entry(rn, re, rmap_name);
2427 } else if (process)
2428 rib_queue_add(rn);
2429 }
2430
2431 static void rib_addnode(struct route_node *rn,
2432 struct route_entry *re, int process)
2433 {
2434 /* RE node has been un-removed before route-node is processed.
2435 * route_node must hence already be on the queue for processing..
2436 */
2437 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) {
2438 if (IS_ZEBRA_DEBUG_RIB)
2439 rnode_debug(rn, re->vrf_id, "rn %p, un-removed re %p",
2440 (void *)rn, (void *)re);
2441
2442 UNSET_FLAG(re->status, ROUTE_ENTRY_REMOVED);
2443 return;
2444 }
2445 rib_link(rn, re, process);
2446 }
2447
2448 /*
2449 * rib_unlink
2450 *
2451 * Detach a rib structure from a route_node.
2452 *
2453 * Note that a call to rib_unlink() should be followed by a call to
2454 * rib_gc_dest() at some point. This allows a rib_dest_t that is no
2455 * longer required to be deleted.
2456 */
2457 void rib_unlink(struct route_node *rn, struct route_entry *re)
2458 {
2459 rib_dest_t *dest;
2460
2461 assert(rn && re);
2462
2463 if (IS_ZEBRA_DEBUG_RIB)
2464 rnode_debug(rn, re->vrf_id, "rn %p, re %p", (void *)rn,
2465 (void *)re);
2466
2467 dest = rib_dest_from_rnode(rn);
2468
2469 re_list_del(&dest->routes, re);
2470
2471 if (dest->selected_fib == re)
2472 dest->selected_fib = NULL;
2473
2474 nexthops_free(re->ng.nexthop);
2475 XFREE(MTYPE_RE, re);
2476 }
2477
2478 void rib_delnode(struct route_node *rn, struct route_entry *re)
2479 {
2480 afi_t afi;
2481
2482 if (IS_ZEBRA_DEBUG_RIB)
2483 rnode_debug(rn, re->vrf_id, "rn %p, re %p, removing",
2484 (void *)rn, (void *)re);
2485 SET_FLAG(re->status, ROUTE_ENTRY_REMOVED);
2486
2487 afi = (rn->p.family == AF_INET)
2488 ? AFI_IP
2489 : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX;
2490 if (is_zebra_import_table_enabled(afi, re->table)) {
2491 zebra_del_import_table_entry(rn, re);
2492 /* Just clean up if non main table */
2493 if (IS_ZEBRA_DEBUG_RIB) {
2494 char buf[SRCDEST2STR_BUFFER];
2495 srcdest_rnode2str(rn, buf, sizeof(buf));
2496 zlog_debug("%u:%s: Freeing route rn %p, re %p (%s)",
2497 re->vrf_id, buf, rn, re,
2498 zebra_route_string(re->type));
2499 }
2500
2501 rib_unlink(rn, re);
2502 } else {
2503 rib_queue_add(rn);
2504 }
2505 }
2506
2507 /* This function dumps the contents of a given RE entry into
2508 * standard debug log. Calling function name and IP prefix in
2509 * question are passed as 1st and 2nd arguments.
2510 */
2511
2512 void _route_entry_dump(const char *func, union prefixconstptr pp,
2513 union prefixconstptr src_pp,
2514 const struct route_entry *re)
2515 {
2516 const struct prefix *src_p = src_pp.p;
2517 bool is_srcdst = src_p && src_p->prefixlen;
2518 char straddr[PREFIX_STRLEN];
2519 char srcaddr[PREFIX_STRLEN];
2520 struct nexthop *nexthop;
2521
2522 zlog_debug("%s: dumping RE entry %p for %s%s%s vrf %u", func,
2523 (const void *)re, prefix2str(pp, straddr, sizeof(straddr)),
2524 is_srcdst ? " from " : "",
2525 is_srcdst ? prefix2str(src_pp, srcaddr, sizeof(srcaddr))
2526 : "",
2527 re->vrf_id);
2528 zlog_debug("%s: uptime == %lu, type == %u, instance == %d, table == %d",
2529 func, (unsigned long)re->uptime, re->type, re->instance,
2530 re->table);
2531 zlog_debug(
2532 "%s: metric == %u, mtu == %u, distance == %u, flags == %u, status == %u",
2533 func, re->metric, re->mtu, re->distance, re->flags, re->status);
2534 zlog_debug("%s: nexthop_num == %u, nexthop_active_num == %u", func,
2535 re->nexthop_num, re->nexthop_active_num);
2536
2537 for (ALL_NEXTHOPS(re->ng, nexthop)) {
2538 struct interface *ifp;
2539 struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id);
2540
2541 switch (nexthop->type) {
2542 case NEXTHOP_TYPE_BLACKHOLE:
2543 sprintf(straddr, "Blackhole");
2544 break;
2545 case NEXTHOP_TYPE_IFINDEX:
2546 ifp = if_lookup_by_index(nexthop->ifindex,
2547 nexthop->vrf_id);
2548 sprintf(straddr, "%s", ifp ? ifp->name : "Unknown");
2549 break;
2550 case NEXTHOP_TYPE_IPV4:
2551 /* fallthrough */
2552 case NEXTHOP_TYPE_IPV4_IFINDEX:
2553 inet_ntop(AF_INET, &nexthop->gate, straddr,
2554 INET6_ADDRSTRLEN);
2555 break;
2556 case NEXTHOP_TYPE_IPV6:
2557 case NEXTHOP_TYPE_IPV6_IFINDEX:
2558 inet_ntop(AF_INET6, &nexthop->gate, straddr,
2559 INET6_ADDRSTRLEN);
2560 break;
2561 }
2562 zlog_debug("%s: %s %s[%u] vrf %s(%u) with flags %s%s%s%s%s%s",
2563 func, (nexthop->rparent ? " NH" : "NH"), straddr,
2564 nexthop->ifindex, vrf ? vrf->name : "Unknown",
2565 nexthop->vrf_id,
2566 (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)
2567 ? "ACTIVE "
2568 : ""),
2569 (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)
2570 ? "FIB "
2571 : ""),
2572 (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)
2573 ? "RECURSIVE "
2574 : ""),
2575 (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)
2576 ? "ONLINK "
2577 : ""),
2578 (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_MATCHED)
2579 ? "MATCHED "
2580 : ""),
2581 (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)
2582 ? "DUPLICATE "
2583 : ""));
2584 }
2585 zlog_debug("%s: dump complete", func);
2586 }
2587
2588 /* This is an exported helper to rtm_read() to dump the strange
2589 * RE entry found by rib_lookup_ipv4_route()
2590 */
2591
2592 void rib_lookup_and_dump(struct prefix_ipv4 *p, vrf_id_t vrf_id)
2593 {
2594 struct route_table *table;
2595 struct route_node *rn;
2596 struct route_entry *re;
2597 char prefix_buf[INET_ADDRSTRLEN];
2598
2599 /* Lookup table. */
2600 table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id);
2601 if (!table) {
2602 flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
2603 "%s:%u zebra_vrf_table() returned NULL", __func__,
2604 vrf_id);
2605 return;
2606 }
2607
2608 /* Scan the RIB table for exactly matching RE entry. */
2609 rn = route_node_lookup(table, (struct prefix *)p);
2610
2611 /* No route for this prefix. */
2612 if (!rn) {
2613 zlog_debug("%s:%u lookup failed for %s", __func__, vrf_id,
2614 prefix2str((struct prefix *)p, prefix_buf,
2615 sizeof(prefix_buf)));
2616 return;
2617 }
2618
2619 /* Unlock node. */
2620 route_unlock_node(rn);
2621
2622 /* let's go */
2623 RNODE_FOREACH_RE (rn, re) {
2624 zlog_debug("%s:%u rn %p, re %p: %s, %s",
2625 __func__, vrf_id,
2626 (void *)rn, (void *)re,
2627 (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)
2628 ? "removed"
2629 : "NOT removed"),
2630 (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)
2631 ? "selected"
2632 : "NOT selected"));
2633 route_entry_dump(p, NULL, re);
2634 }
2635 }
2636
2637 /* Check if requested address assignment will fail due to another
2638 * route being installed by zebra in FIB already. Take necessary
2639 * actions, if needed: remove such a route from FIB and deSELECT
2640 * corresponding RE entry. Then put affected RN into RIBQ head.
2641 */
2642 void rib_lookup_and_pushup(struct prefix_ipv4 *p, vrf_id_t vrf_id)
2643 {
2644 struct route_table *table;
2645 struct route_node *rn;
2646 rib_dest_t *dest;
2647
2648 if (NULL == (table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id))) {
2649 flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
2650 "%s:%u zebra_vrf_table() returned NULL", __func__,
2651 vrf_id);
2652 return;
2653 }
2654
2655 /* No matches would be the simplest case. */
2656 if (NULL == (rn = route_node_lookup(table, (struct prefix *)p)))
2657 return;
2658
2659 /* Unlock node. */
2660 route_unlock_node(rn);
2661
2662 dest = rib_dest_from_rnode(rn);
2663 /* Check all RE entries. In case any changes have to be done, requeue
2664 * the RN into RIBQ head. If the routing message about the new connected
2665 * route (generated by the IP address we are going to assign very soon)
2666 * comes before the RIBQ is processed, the new RE entry will join
2667 * RIBQ record already on head. This is necessary for proper
2668 * revalidation
2669 * of the rest of the RE.
2670 */
2671 if (dest->selected_fib) {
2672 if (IS_ZEBRA_DEBUG_RIB) {
2673 char buf[PREFIX_STRLEN];
2674
2675 zlog_debug("%u:%s: freeing way for connected prefix",
2676 dest->selected_fib->vrf_id,
2677 prefix2str(&rn->p, buf, sizeof(buf)));
2678 route_entry_dump(&rn->p, NULL, dest->selected_fib);
2679 }
2680 rib_uninstall(rn, dest->selected_fib);
2681 rib_queue_add(rn);
2682 }
2683 }
2684
2685 int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
2686 struct prefix_ipv6 *src_p, struct route_entry *re)
2687 {
2688 struct route_table *table;
2689 struct route_node *rn;
2690 struct route_entry *same = NULL;
2691 int ret = 0;
2692
2693 if (!re)
2694 return 0;
2695
2696 assert(!src_p || !src_p->prefixlen || afi == AFI_IP6);
2697
2698 /* Lookup table. */
2699 table = zebra_vrf_table_with_table_id(afi, safi, re->vrf_id, re->table);
2700 if (!table) {
2701 XFREE(MTYPE_RE, re);
2702 return 0;
2703 }
2704
2705 /* Make it sure prefixlen is applied to the prefix. */
2706 apply_mask(p);
2707 if (src_p)
2708 apply_mask_ipv6(src_p);
2709
2710 /* Set default distance by route type. */
2711 if (re->distance == 0) {
2712 re->distance = route_distance(re->type);
2713
2714 /* iBGP distance is 200. */
2715 if (re->type == ZEBRA_ROUTE_BGP
2716 && CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP))
2717 re->distance = 200;
2718 }
2719
2720 /* Lookup route node.*/
2721 rn = srcdest_rnode_get(table, p, src_p);
2722
2723 /*
2724 * If same type of route are installed, treat it as a implicit
2725 * withdraw.
2726 * If the user has specified the No route replace semantics
2727 * for the install don't do a route replace.
2728 */
2729 RNODE_FOREACH_RE (rn, same) {
2730 if (CHECK_FLAG(same->status, ROUTE_ENTRY_REMOVED))
2731 continue;
2732
2733 if (same->type != re->type)
2734 continue;
2735 if (same->instance != re->instance)
2736 continue;
2737 if (same->type == ZEBRA_ROUTE_KERNEL
2738 && same->metric != re->metric)
2739 continue;
2740
2741 if (CHECK_FLAG(re->flags, ZEBRA_FLAG_RR_USE_DISTANCE) &&
2742 same->distance != re->distance)
2743 continue;
2744
2745 /*
2746 * We should allow duplicate connected routes
2747 * because of IPv6 link-local routes and unnumbered
2748 * interfaces on Linux.
2749 */
2750 if (same->type != ZEBRA_ROUTE_CONNECT)
2751 break;
2752 }
2753
2754 /* If this route is kernel/connected route, notify the dataplane. */
2755 if (RIB_SYSTEM_ROUTE(re)) {
2756 /* Notify dataplane */
2757 dplane_sys_route_add(rn, re);
2758 }
2759
2760 /* Link new re to node.*/
2761 if (IS_ZEBRA_DEBUG_RIB) {
2762 rnode_debug(rn, re->vrf_id,
2763 "Inserting route rn %p, re %p (%s) existing %p",
2764 rn, re, zebra_route_string(re->type), same);
2765
2766 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2767 route_entry_dump(p, src_p, re);
2768 }
2769
2770 SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
2771 rib_addnode(rn, re, 1);
2772 ret = 1;
2773
2774 /* Free implicit route.*/
2775 if (same) {
2776 rib_delnode(rn, same);
2777 ret = -1;
2778 }
2779
2780 route_unlock_node(rn);
2781 return ret;
2782 }
2783
2784 void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
2785 unsigned short instance, int flags, struct prefix *p,
2786 struct prefix_ipv6 *src_p, const struct nexthop *nh,
2787 uint32_t table_id, uint32_t metric, uint8_t distance,
2788 bool fromkernel)
2789 {
2790 struct route_table *table;
2791 struct route_node *rn;
2792 struct route_entry *re;
2793 struct route_entry *fib = NULL;
2794 struct route_entry *same = NULL;
2795 struct nexthop *rtnh;
2796 char buf2[INET6_ADDRSTRLEN];
2797 rib_dest_t *dest;
2798
2799 assert(!src_p || !src_p->prefixlen || afi == AFI_IP6);
2800
2801 /* Lookup table. */
2802 table = zebra_vrf_table_with_table_id(afi, safi, vrf_id, table_id);
2803 if (!table)
2804 return;
2805
2806 /* Apply mask. */
2807 apply_mask(p);
2808 if (src_p)
2809 apply_mask_ipv6(src_p);
2810
2811 /* Lookup route node. */
2812 rn = srcdest_rnode_lookup(table, p, src_p);
2813 if (!rn) {
2814 char dst_buf[PREFIX_STRLEN], src_buf[PREFIX_STRLEN];
2815
2816 prefix2str(p, dst_buf, sizeof(dst_buf));
2817 if (src_p && src_p->prefixlen)
2818 prefix2str(src_p, src_buf, sizeof(src_buf));
2819 else
2820 src_buf[0] = '\0';
2821
2822 if (IS_ZEBRA_DEBUG_RIB)
2823 zlog_debug("%u:%s%s%s doesn't exist in rib", vrf_id,
2824 dst_buf,
2825 (src_buf[0] != '\0') ? " from " : "",
2826 src_buf);
2827 return;
2828 }
2829
2830 dest = rib_dest_from_rnode(rn);
2831 fib = dest->selected_fib;
2832
2833 /* Lookup same type route. */
2834 RNODE_FOREACH_RE (rn, re) {
2835 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
2836 continue;
2837
2838 if (re->type != type)
2839 continue;
2840 if (re->instance != instance)
2841 continue;
2842 if (CHECK_FLAG(re->flags, ZEBRA_FLAG_RR_USE_DISTANCE) &&
2843 distance != re->distance)
2844 continue;
2845
2846 if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != metric)
2847 continue;
2848 if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = re->ng.nexthop)
2849 && rtnh->type == NEXTHOP_TYPE_IFINDEX && nh) {
2850 if (rtnh->ifindex != nh->ifindex)
2851 continue;
2852 same = re;
2853 break;
2854 }
2855 /* Make sure that the route found has the same gateway. */
2856 else {
2857 if (nh == NULL) {
2858 same = re;
2859 break;
2860 }
2861 for (ALL_NEXTHOPS(re->ng, rtnh))
2862 if (nexthop_same_no_recurse(rtnh, nh)) {
2863 same = re;
2864 break;
2865 }
2866 if (same)
2867 break;
2868 }
2869 }
2870 /* If same type of route can't be found and this message is from
2871 kernel. */
2872 if (!same) {
2873 /*
2874 * In the past(HA!) we could get here because
2875 * we were receiving a route delete from the
2876 * kernel and we're not marking the proto
2877 * as coming from it's appropriate originator.
2878 * Now that we are properly noticing the fact
2879 * that the kernel has deleted our route we
2880 * are not going to get called in this path
2881 * I am going to leave this here because
2882 * this might still work this way on non-linux
2883 * platforms as well as some weird state I have
2884 * not properly thought of yet.
2885 * If we can show that this code path is
2886 * dead then we can remove it.
2887 */
2888 if (fib && CHECK_FLAG(flags, ZEBRA_FLAG_SELFROUTE)) {
2889 if (IS_ZEBRA_DEBUG_RIB) {
2890 rnode_debug(rn, vrf_id,
2891 "rn %p, re %p (%s) was deleted from kernel, adding",
2892 rn, fib,
2893 zebra_route_string(fib->type));
2894 }
2895 if (allow_delete) {
2896 UNSET_FLAG(fib->status, ROUTE_ENTRY_INSTALLED);
2897 /* Unset flags. */
2898 for (rtnh = fib->ng.nexthop; rtnh;
2899 rtnh = rtnh->next)
2900 UNSET_FLAG(rtnh->flags,
2901 NEXTHOP_FLAG_FIB);
2902
2903 /*
2904 * This is a non FRR route
2905 * as such we should mark
2906 * it as deleted
2907 */
2908 dest->selected_fib = NULL;
2909 } else {
2910 /* This means someone else, other than Zebra,
2911 * has deleted
2912 * a Zebra router from the kernel. We will add
2913 * it back */
2914 rib_install_kernel(rn, fib, NULL);
2915 }
2916 } else {
2917 if (IS_ZEBRA_DEBUG_RIB) {
2918 if (nh)
2919 rnode_debug(
2920 rn, vrf_id,
2921 "via %s ifindex %d type %d "
2922 "doesn't exist in rib",
2923 inet_ntop(afi2family(afi),
2924 &nh->gate, buf2,
2925 sizeof(buf2)),
2926 nh->ifindex, type);
2927 else
2928 rnode_debug(
2929 rn, vrf_id,
2930 "type %d doesn't exist in rib",
2931 type);
2932 }
2933 route_unlock_node(rn);
2934 return;
2935 }
2936 }
2937
2938 if (same) {
2939 if (fromkernel && CHECK_FLAG(flags, ZEBRA_FLAG_SELFROUTE)
2940 && !allow_delete) {
2941 rib_install_kernel(rn, same, NULL);
2942 route_unlock_node(rn);
2943
2944 return;
2945 }
2946
2947 /* Special handling for IPv4 or IPv6 routes sourced from
2948 * EVPN - the nexthop (and associated MAC) need to be
2949 * uninstalled if no more refs.
2950 */
2951 if (CHECK_FLAG(flags, ZEBRA_FLAG_EVPN_ROUTE)) {
2952 struct nexthop *tmp_nh;
2953
2954 for (ALL_NEXTHOPS(re->ng, tmp_nh)) {
2955 struct ipaddr vtep_ip;
2956
2957 memset(&vtep_ip, 0, sizeof(struct ipaddr));
2958 if (afi == AFI_IP) {
2959 vtep_ip.ipa_type = IPADDR_V4;
2960 memcpy(&(vtep_ip.ipaddr_v4),
2961 &(tmp_nh->gate.ipv4),
2962 sizeof(struct in_addr));
2963 } else {
2964 vtep_ip.ipa_type = IPADDR_V6;
2965 memcpy(&(vtep_ip.ipaddr_v6),
2966 &(tmp_nh->gate.ipv6),
2967 sizeof(struct in6_addr));
2968 }
2969 zebra_vxlan_evpn_vrf_route_del(re->vrf_id,
2970 &vtep_ip, p);
2971 }
2972 }
2973
2974 /* Notify dplane if system route changes */
2975 if (RIB_SYSTEM_ROUTE(re))
2976 dplane_sys_route_del(rn, same);
2977
2978 rib_delnode(rn, same);
2979 }
2980
2981 route_unlock_node(rn);
2982 return;
2983 }
2984
2985
2986 int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
2987 unsigned short instance, int flags, struct prefix *p,
2988 struct prefix_ipv6 *src_p, const struct nexthop *nh,
2989 uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance,
2990 route_tag_t tag)
2991 {
2992 struct route_entry *re;
2993 struct nexthop *nexthop;
2994
2995 /* Allocate new route_entry structure. */
2996 re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
2997 re->type = type;
2998 re->instance = instance;
2999 re->distance = distance;
3000 re->flags = flags;
3001 re->metric = metric;
3002 re->mtu = mtu;
3003 re->table = table_id;
3004 re->vrf_id = vrf_id;
3005 re->nexthop_num = 0;
3006 re->uptime = time(NULL);
3007 re->tag = tag;
3008
3009 /* Add nexthop. */
3010 nexthop = nexthop_new();
3011 *nexthop = *nh;
3012 route_entry_nexthop_add(re, nexthop);
3013
3014 return rib_add_multipath(afi, safi, p, src_p, re);
3015 }
3016
3017 /* Schedule routes of a particular table (address-family) based on event. */
3018 void rib_update_table(struct route_table *table, rib_update_event_t event)
3019 {
3020 struct route_node *rn;
3021 struct route_entry *re, *next;
3022
3023 /* Walk all routes and queue for processing, if appropriate for
3024 * the trigger event.
3025 */
3026 for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) {
3027 /*
3028 * If we are looking at a route node and the node
3029 * has already been queued we don't
3030 * need to queue it up again
3031 */
3032 if (rn->info && CHECK_FLAG(rib_dest_from_rnode(rn)->flags,
3033 RIB_ROUTE_ANY_QUEUED))
3034 continue;
3035 switch (event) {
3036 case RIB_UPDATE_IF_CHANGE:
3037 /* Examine all routes that won't get processed by the
3038 * protocol or
3039 * triggered by nexthop evaluation (NHT). This would be
3040 * system,
3041 * kernel and certain static routes. Note that NHT will
3042 * get
3043 * triggered upon an interface event as connected routes
3044 * always
3045 * get queued for processing.
3046 */
3047 RNODE_FOREACH_RE_SAFE (rn, re, next) {
3048 struct nexthop *nh;
3049
3050 if (re->type != ZEBRA_ROUTE_SYSTEM
3051 && re->type != ZEBRA_ROUTE_KERNEL
3052 && re->type != ZEBRA_ROUTE_CONNECT
3053 && re->type != ZEBRA_ROUTE_STATIC)
3054 continue;
3055
3056 if (re->type != ZEBRA_ROUTE_STATIC) {
3057 SET_FLAG(re->status,
3058 ROUTE_ENTRY_CHANGED);
3059 rib_queue_add(rn);
3060 continue;
3061 }
3062
3063 for (nh = re->ng.nexthop; nh; nh = nh->next)
3064 if (!(nh->type == NEXTHOP_TYPE_IPV4
3065 || nh->type == NEXTHOP_TYPE_IPV6))
3066 break;
3067
3068 /* If we only have nexthops to a
3069 * gateway, NHT will
3070 * take care.
3071 */
3072 if (nh) {
3073 SET_FLAG(re->status,
3074 ROUTE_ENTRY_CHANGED);
3075 rib_queue_add(rn);
3076 }
3077 }
3078 break;
3079
3080 case RIB_UPDATE_RMAP_CHANGE:
3081 case RIB_UPDATE_OTHER:
3082 /* Right now, examine all routes. Can restrict to a
3083 * protocol in
3084 * some cases (TODO).
3085 */
3086 if (rnode_to_ribs(rn)) {
3087 RNODE_FOREACH_RE_SAFE (rn, re, next)
3088 SET_FLAG(re->status,
3089 ROUTE_ENTRY_CHANGED);
3090 rib_queue_add(rn);
3091 }
3092 break;
3093
3094 default:
3095 break;
3096 }
3097 }
3098 }
3099
3100 /* RIB update function. */
3101 void rib_update(vrf_id_t vrf_id, rib_update_event_t event)
3102 {
3103 struct route_table *table;
3104
3105 /* Process routes of interested address-families. */
3106 table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id);
3107 if (table) {
3108 if (IS_ZEBRA_DEBUG_EVENT)
3109 zlog_debug("%s : AFI_IP event %d", __func__, event);
3110 rib_update_table(table, event);
3111 }
3112
3113 table = zebra_vrf_table(AFI_IP6, SAFI_UNICAST, vrf_id);
3114 if (table) {
3115 if (IS_ZEBRA_DEBUG_EVENT)
3116 zlog_debug("%s : AFI_IP6 event %d", __func__, event);
3117 rib_update_table(table, event);
3118 }
3119 }
3120
3121 /* Delete self installed routes after zebra is relaunched. */
3122 void rib_sweep_table(struct route_table *table)
3123 {
3124 struct route_node *rn;
3125 struct route_entry *re;
3126 struct route_entry *next;
3127 struct nexthop *nexthop;
3128
3129 if (!table)
3130 return;
3131
3132 for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) {
3133 RNODE_FOREACH_RE_SAFE (rn, re, next) {
3134 if (IS_ZEBRA_DEBUG_RIB)
3135 route_entry_dump(&rn->p, NULL, re);
3136
3137 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
3138 continue;
3139
3140 if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_SELFROUTE))
3141 continue;
3142
3143 /*
3144 * So we are starting up and have received
3145 * routes from the kernel that we have installed
3146 * from a previous run of zebra but not cleaned
3147 * up ( say a kill -9 )
3148 * But since we haven't actually installed
3149 * them yet( we received them from the kernel )
3150 * we don't think they are active.
3151 * So let's pretend they are active to actually
3152 * remove them.
3153 * In all honesty I'm not sure if we should
3154 * mark them as active when we receive them
3155 * This is startup only so probably ok.
3156 *
3157 * If we ever decide to move rib_sweep_table
3158 * to a different spot (ie startup )
3159 * this decision needs to be revisited
3160 */
3161 SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
3162 for (ALL_NEXTHOPS(re->ng, nexthop))
3163 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
3164
3165 rib_uninstall_kernel(rn, re);
3166 rib_delnode(rn, re);
3167 }
3168 }
3169 }
3170
3171 /* Sweep all RIB tables. */
3172 void rib_sweep_route(void)
3173 {
3174 struct vrf *vrf;
3175 struct zebra_vrf *zvrf;
3176
3177 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
3178 if ((zvrf = vrf->info) == NULL)
3179 continue;
3180
3181 rib_sweep_table(zvrf->table[AFI_IP][SAFI_UNICAST]);
3182 rib_sweep_table(zvrf->table[AFI_IP6][SAFI_UNICAST]);
3183 }
3184
3185 zebra_router_sweep_route();
3186 }
3187
3188 /* Remove specific by protocol routes from 'table'. */
3189 unsigned long rib_score_proto_table(uint8_t proto, unsigned short instance,
3190 struct route_table *table)
3191 {
3192 struct route_node *rn;
3193 struct route_entry *re;
3194 struct route_entry *next;
3195 unsigned long n = 0;
3196
3197 if (table)
3198 for (rn = route_top(table); rn; rn = srcdest_route_next(rn))
3199 RNODE_FOREACH_RE_SAFE (rn, re, next) {
3200 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
3201 continue;
3202 if (re->type == proto
3203 && re->instance == instance) {
3204 rib_delnode(rn, re);
3205 n++;
3206 }
3207 }
3208 return n;
3209 }
3210
3211 /* Remove specific by protocol routes. */
3212 unsigned long rib_score_proto(uint8_t proto, unsigned short instance)
3213 {
3214 struct vrf *vrf;
3215 struct zebra_vrf *zvrf;
3216 unsigned long cnt = 0;
3217
3218 RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id)
3219 if ((zvrf = vrf->info) != NULL)
3220 cnt += rib_score_proto_table(
3221 proto, instance,
3222 zvrf->table[AFI_IP][SAFI_UNICAST])
3223 + rib_score_proto_table(
3224 proto, instance,
3225 zvrf->table[AFI_IP6][SAFI_UNICAST]);
3226
3227 cnt += zebra_router_score_proto(proto, instance);
3228
3229 return cnt;
3230 }
3231
3232 /* Close RIB and clean up kernel routes. */
3233 void rib_close_table(struct route_table *table)
3234 {
3235 struct route_node *rn;
3236 rib_table_info_t *info;
3237 rib_dest_t *dest;
3238
3239 if (!table)
3240 return;
3241
3242 info = route_table_get_info(table);
3243
3244 for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) {
3245 dest = rib_dest_from_rnode(rn);
3246
3247 if (dest && dest->selected_fib) {
3248 if (info->safi == SAFI_UNICAST)
3249 hook_call(rib_update, rn, NULL);
3250
3251 rib_uninstall_kernel(rn, dest->selected_fib);
3252 dest->selected_fib = NULL;
3253 }
3254 }
3255 }
3256
3257 /*
3258 * Handler for async dataplane results after a pseudowire installation
3259 */
3260 static int handle_pw_result(struct zebra_dplane_ctx *ctx)
3261 {
3262 struct zebra_pw *pw;
3263 struct zebra_vrf *vrf;
3264
3265 /* The pseudowire code assumes success - we act on an error
3266 * result for installation attempts here.
3267 */
3268 if (dplane_ctx_get_op(ctx) != DPLANE_OP_PW_INSTALL)
3269 goto done;
3270
3271 if (dplane_ctx_get_status(ctx) != ZEBRA_DPLANE_REQUEST_SUCCESS) {
3272 vrf = zebra_vrf_lookup_by_id(dplane_ctx_get_vrf(ctx));
3273 pw = zebra_pw_find(vrf, dplane_ctx_get_pw_ifname(ctx));
3274 if (pw)
3275 zebra_pw_install_failure(pw);
3276 }
3277
3278 done:
3279
3280 return 0;
3281 }
3282
3283
3284 /*
3285 * Handle results from the dataplane system. Dequeue update context
3286 * structs, dispatch to appropriate internal handlers.
3287 */
3288 static int rib_process_dplane_results(struct thread *thread)
3289 {
3290 struct zebra_dplane_ctx *ctx;
3291 struct dplane_ctx_q ctxlist;
3292
3293 /* Dequeue a list of completed updates with one lock/unlock cycle */
3294
3295 do {
3296 TAILQ_INIT(&ctxlist);
3297
3298 /* Take lock controlling queue of results */
3299 pthread_mutex_lock(&dplane_mutex);
3300 {
3301 /* Dequeue list of context structs */
3302 dplane_ctx_list_append(&ctxlist, &rib_dplane_q);
3303 }
3304 pthread_mutex_unlock(&dplane_mutex);
3305
3306 /* Dequeue context block */
3307 ctx = dplane_ctx_dequeue(&ctxlist);
3308
3309 /* If we've emptied the results queue, we're done */
3310 if (ctx == NULL)
3311 break;
3312
3313 while (ctx) {
3314 switch (dplane_ctx_get_op(ctx)) {
3315 case DPLANE_OP_ROUTE_INSTALL:
3316 case DPLANE_OP_ROUTE_UPDATE:
3317 case DPLANE_OP_ROUTE_DELETE:
3318 rib_process_result(ctx);
3319 break;
3320
3321 case DPLANE_OP_LSP_INSTALL:
3322 case DPLANE_OP_LSP_UPDATE:
3323 case DPLANE_OP_LSP_DELETE:
3324 zebra_mpls_lsp_dplane_result(ctx);
3325 break;
3326
3327 case DPLANE_OP_PW_INSTALL:
3328 case DPLANE_OP_PW_UNINSTALL:
3329 handle_pw_result(ctx);
3330 break;
3331
3332 case DPLANE_OP_SYS_ROUTE_ADD:
3333 case DPLANE_OP_SYS_ROUTE_DELETE:
3334 /* No further processing in zebra for these. */
3335 dplane_ctx_fini(&ctx);
3336 break;
3337
3338 default:
3339 /* Don't expect this: just return the struct? */
3340 dplane_ctx_fini(&ctx);
3341 break;
3342 } /* Dispatch by op code */
3343
3344 ctx = dplane_ctx_dequeue(&ctxlist);
3345 }
3346
3347 } while (1);
3348
3349 /* Check for nexthop tracking processing after finishing with results */
3350 do_nht_processing();
3351
3352 return 0;
3353 }
3354
3355 /*
3356 * Results are returned from the dataplane subsystem, in the context of
3357 * the dataplane pthread. We enqueue the results here for processing by
3358 * the main thread later.
3359 */
3360 static int rib_dplane_results(struct dplane_ctx_q *ctxlist)
3361 {
3362 /* Take lock controlling queue of results */
3363 pthread_mutex_lock(&dplane_mutex);
3364 {
3365 /* Enqueue context blocks */
3366 dplane_ctx_list_append(&rib_dplane_q, ctxlist);
3367 }
3368 pthread_mutex_unlock(&dplane_mutex);
3369
3370 /* Ensure event is signalled to zebra main pthread */
3371 thread_add_event(zrouter.master, rib_process_dplane_results, NULL, 0,
3372 &t_dplane);
3373
3374 return 0;
3375 }
3376
3377 /*
3378 * Ensure there are no empty slots in the route_info array.
3379 * Every route type in zebra should be present there.
3380 */
3381 static void check_route_info(void)
3382 {
3383 int len = array_size(route_info);
3384
3385 /*
3386 * ZEBRA_ROUTE_SYSTEM is special cased since
3387 * its key is 0 anyway.
3388 *
3389 * ZEBRA_ROUTE_ALL is also ignored.
3390 */
3391 for (int i = 0; i < len; i++) {
3392 if (i == ZEBRA_ROUTE_SYSTEM || i == ZEBRA_ROUTE_ALL)
3393 continue;
3394 assert(route_info[i].key);
3395 assert(route_info[i].meta_q_map < MQ_SIZE);
3396 }
3397 }
3398
3399 /* Routing information base initialize. */
3400 void rib_init(void)
3401 {
3402 check_route_info();
3403
3404 rib_queue_init();
3405
3406 /* Init dataplane, and register for results */
3407 pthread_mutex_init(&dplane_mutex, NULL);
3408 TAILQ_INIT(&rib_dplane_q);
3409 zebra_dplane_init(rib_dplane_results);
3410 }
3411
3412 /*
3413 * vrf_id_get_next
3414 *
3415 * Get the first vrf id that is greater than the given vrf id if any.
3416 *
3417 * Returns TRUE if a vrf id was found, FALSE otherwise.
3418 */
3419 static inline int vrf_id_get_next(vrf_id_t vrf_id, vrf_id_t *next_id_p)
3420 {
3421 struct vrf *vrf;
3422
3423 vrf = vrf_lookup_by_id(vrf_id);
3424 if (vrf) {
3425 vrf = RB_NEXT(vrf_id_head, vrf);
3426 if (vrf) {
3427 *next_id_p = vrf->vrf_id;
3428 return 1;
3429 }
3430 }
3431
3432 return 0;
3433 }
3434
3435 /*
3436 * rib_tables_iter_next
3437 *
3438 * Returns the next table in the iteration.
3439 */
3440 struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter)
3441 {
3442 struct route_table *table;
3443
3444 /*
3445 * Array that helps us go over all AFI/SAFI combinations via one
3446 * index.
3447 */
3448 static struct {
3449 afi_t afi;
3450 safi_t safi;
3451 } afi_safis[] = {
3452 {AFI_IP, SAFI_UNICAST}, {AFI_IP, SAFI_MULTICAST},
3453 {AFI_IP, SAFI_LABELED_UNICAST}, {AFI_IP6, SAFI_UNICAST},
3454 {AFI_IP6, SAFI_MULTICAST}, {AFI_IP6, SAFI_LABELED_UNICAST},
3455 };
3456
3457 table = NULL;
3458
3459 switch (iter->state) {
3460
3461 case RIB_TABLES_ITER_S_INIT:
3462 iter->vrf_id = VRF_DEFAULT;
3463 iter->afi_safi_ix = -1;
3464
3465 /* Fall through */
3466
3467 case RIB_TABLES_ITER_S_ITERATING:
3468 iter->afi_safi_ix++;
3469 while (1) {
3470
3471 while (iter->afi_safi_ix
3472 < (int)array_size(afi_safis)) {
3473 table = zebra_vrf_table(
3474 afi_safis[iter->afi_safi_ix].afi,
3475 afi_safis[iter->afi_safi_ix].safi,
3476 iter->vrf_id);
3477 if (table)
3478 break;
3479
3480 iter->afi_safi_ix++;
3481 }
3482
3483 /*
3484 * Found another table in this vrf.
3485 */
3486 if (table)
3487 break;
3488
3489 /*
3490 * Done with all tables in the current vrf, go to the
3491 * next
3492 * one.
3493 */
3494 if (!vrf_id_get_next(iter->vrf_id, &iter->vrf_id))
3495 break;
3496
3497 iter->afi_safi_ix = 0;
3498 }
3499
3500 break;
3501
3502 case RIB_TABLES_ITER_S_DONE:
3503 return NULL;
3504 }
3505
3506 if (table)
3507 iter->state = RIB_TABLES_ITER_S_ITERATING;
3508 else
3509 iter->state = RIB_TABLES_ITER_S_DONE;
3510
3511 return table;
3512 }