]> git.proxmox.com Git - mirror_frr.git/commitdiff
pimd: Pim Nexthop Tracking support with ECMP
authorChirag Shah <chirag@cumulusnetworks.com>
Wed, 5 Apr 2017 20:14:12 +0000 (13:14 -0700)
committerChirag Shah <chirag@cumulusnetworks.com>
Tue, 25 Apr 2017 19:51:18 +0000 (12:51 -0700)
In this patch, PIM nexthop tracking uses locally populated nexthop cached list
to determine ECMP based nexthop (w/ ECMP knob enabled), otherwise picks
the first nexthop as RPF.
Introduced '[no] ip pim ecmp' command to enable/disable PIM ECMP knob.
By default, PIM ECMP is disabled.
Intorudced '[no] ip pim ecmp rebalance' command to provide existing mcache
entry to switch new path based on hash chosen path.
Introduced, show command to display pim registered addresses and respective nexthops.
Introuduce, show command to find nexthop and out interface for (S,G) or (RP,G).
Re-Register an address with nexthop when Interface UP event received,
to ensure the PIM nexthop cache is updated (being PIM enabled).
During PIM neighbor UP, traverse all RPs and Upstreams nexthop and determine, if
any of nexthop's IPv4 address changes/resolves due to neigbor UP event.

Testing Done: Run various LHR, RP and FHR related cases to resolve RPF using
nexthop cache with ECMP knob disabled, performed interface/PIM neighbor flap events.
Executed pim-smoke with knob disabled.

Signed-off-by: Chirag Shah <chirag@cumulusnetworks.com>
(cherry picked from commit cba444817883b8b3b22a7ed9958dc9ed77f76230)

15 files changed:
pimd/pim_cmd.c
pimd/pim_iface.c
pimd/pim_mroute.c
pimd/pim_neighbor.c
pimd/pim_nht.c
pimd/pim_nht.h
pimd/pim_oil.c
pimd/pim_register.c
pimd/pim_rp.c
pimd/pim_rp.h
pimd/pim_rpf.c
pimd/pim_upstream.c
pimd/pim_zebra.c
pimd/pimd.c
pimd/pimd.h

index de2b7cbba6db84befdf469037f007d29c944b6cb..acff785ce9468e26e5e30889f8375f7767a82ce6 100644 (file)
@@ -26,6 +26,8 @@
 #include "prefix.h"
 #include "zclient.h"
 #include "plist.h"
+#include "hash.h"
+#include "nexthop.h"
 
 #include "pimd.h"
 #include "pim_mroute.h"
@@ -55,6 +57,7 @@
 #include "pim_zlookup.h"
 #include "pim_msdp.h"
 #include "pim_ssm.h"
+#include "pim_nht.h"
 
 static struct cmd_node pim_global_node = {
   PIM_NODE,
@@ -2065,6 +2068,50 @@ static void pim_show_rpf(struct vty *vty, u_char uj)
   }
 }
 
+static int
+pim_print_pnc_cache_walkcb (struct hash_backet *backet, void *arg)
+{
+  struct pim_nexthop_cache *pnc = backet->data;
+  struct vty *vty = arg;
+  struct nexthop *nh_node = NULL;
+  ifindex_t first_ifindex;
+  struct interface *ifp = NULL;
+
+  if (!pnc)
+    return CMD_SUCCESS;
+
+  for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next)
+    {
+      first_ifindex = nh_node->ifindex;
+      ifp = if_lookup_by_index (first_ifindex, VRF_DEFAULT);
+
+      vty_out (vty, "%-15s ", inet_ntoa (pnc->rpf.rpf_addr.u.prefix4));
+      vty_out (vty, "%-14s ", ifp ? ifp->name : "NULL");
+      vty_out (vty, "%s ", inet_ntoa (nh_node->gate.ipv4));
+      vty_out (vty, "%s", VTY_NEWLINE);
+    }
+  return CMD_SUCCESS;
+}
+
+static void
+pim_show_nexthop (struct vty *vty)
+{
+
+  if (pimg && !pimg->rpf_hash)
+    {
+      vty_out (vty, "no nexthop cache %s", VTY_NEWLINE);
+      return;
+    }
+
+  vty_out (vty, "Number of registered addresses: %lu %s",
+           pimg->rpf_hash->count, VTY_NEWLINE);
+  vty_out (vty, "Address         Interface      Nexthop%s", VTY_NEWLINE);
+  vty_out (vty, "-------------------------------------------%s", VTY_NEWLINE);
+
+  hash_walk (pimg->rpf_hash, pim_print_pnc_cache_walkcb, vty);
+
+}
+
 static void igmp_show_groups(struct vty *vty, u_char uj)
 {
   struct listnode  *ifnode;
@@ -2799,6 +2846,99 @@ DEFUN (show_ip_pim_rpf,
   return CMD_SUCCESS;
 }
 
+DEFUN (show_ip_pim_nexthop,
+       show_ip_pim_nexthop_cmd,
+       "show ip pim nexthop",
+       SHOW_STR
+       IP_STR
+       PIM_STR
+       "PIM cached nexthop rpf information\n")
+{
+  pim_show_nexthop (vty);
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (show_ip_pim_nexthop_lookup,
+       show_ip_pim_nexthop_lookup_cmd,
+       "show ip pim nexthop-lookup A.B.C.D A.B.C.D",
+       SHOW_STR
+       IP_STR
+       PIM_STR
+       "PIM cached nexthop rpf lookup\n"
+       "Source/RP address\n"
+       "Multicast Group address\n")
+{
+  struct pim_nexthop_cache pnc;
+  struct prefix nht_p;
+  int result = 0;
+  struct in_addr src_addr, grp_addr;
+  struct in_addr vif_source;
+  const char *addr_str, *addr_str1;
+  struct prefix grp;
+  struct pim_nexthop nexthop;
+  char nexthop_addr_str[PREFIX_STRLEN];
+  char grp_str[PREFIX_STRLEN];
+
+  addr_str = (const char *)argv[0];
+  result = inet_pton (AF_INET, addr_str, &src_addr);
+  if (result <= 0)
+    {
+      vty_out (vty, "Bad unicast address %s: errno=%d: %s%s",
+               addr_str, errno, safe_strerror (errno), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  if (pim_is_group_224_4 (src_addr))
+    {
+      vty_out (vty, "Invalid argument. Expected Valid Source Address.%s", VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  addr_str1 = (const char *)argv[1];
+  result = inet_pton (AF_INET, addr_str1, &grp_addr);
+  if (result <= 0)
+    {
+      vty_out (vty, "Bad unicast address %s: errno=%d: %s%s",
+               addr_str, errno, safe_strerror (errno), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  if (!pim_is_group_224_4 (grp_addr))
+    {
+      vty_out (vty, "Invalid argument. Expected Valid Multicast Group Address.%s", VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  if (!pim_rp_set_upstream_addr (&vif_source, src_addr, grp_addr))
+    return CMD_SUCCESS;
+
+  memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+  nht_p.family = AF_INET;
+  nht_p.prefixlen = IPV4_MAX_BITLEN;
+  nht_p.u.prefix4 = vif_source;
+  grp.family = AF_INET;
+  grp.prefixlen = IPV4_MAX_BITLEN;
+  grp.u.prefix4 = grp_addr;
+  memset (&nexthop, 0, sizeof (nexthop));
+
+  if ((pim_find_or_track_nexthop (&nht_p, NULL, NULL, &pnc)) == 1)
+    {
+      //Compute PIM RPF using Cached nexthop
+      pim_ecmp_nexthop_search (&pnc, &nexthop, &nht_p, &grp, 0);
+    }
+  else
+    pim_ecmp_nexthop_lookup (&nexthop, vif_source, &nht_p, &grp, 0);
+
+  pim_addr_dump ("<grp?>", &grp, grp_str, sizeof (grp_str));
+  pim_addr_dump ("<nexthop?>", &nexthop.mrib_nexthop_addr,
+                 nexthop_addr_str, sizeof (nexthop_addr_str));
+  vty_out (vty, "Group %s --- Nexthop %s Interface %s %s", grp_str,
+           nexthop_addr_str, nexthop.interface->name, VTY_NEWLINE);
+
+  return CMD_SUCCESS;
+}
+
 static void show_multicast_interfaces(struct vty *vty)
 {
   struct listnode  *node;
@@ -2877,15 +3017,17 @@ DEFUN (show_ip_multicast,
          PIM_MAX_USABLE_VIFS,
          VTY_NEWLINE);
 
-  vty_out(vty, "%s", VTY_NEWLINE);
-  vty_out(vty, "Upstream Join Timer: %d secs%s",
-         qpim_t_periodic,
-         VTY_NEWLINE);
-  vty_out(vty, "Join/Prune Holdtime: %d secs%s",
-         PIM_JP_HOLDTIME,
-         VTY_NEWLINE);
+  vty_out (vty, "%s", VTY_NEWLINE);
+  vty_out (vty, "Upstream Join Timer: %d secs%s",
+           qpim_t_periodic, VTY_NEWLINE);
+  vty_out (vty, "Join/Prune Holdtime: %d secs%s",
+           PIM_JP_HOLDTIME, VTY_NEWLINE);
+  vty_out (vty, "PIM ECMP: %s%s",
+           qpim_ecmp_enable ? "Enable" : "Disable", VTY_NEWLINE);
+  vty_out (vty, "PIM ECMP Rebalance: %s%s",
+           qpim_ecmp_rebalance_enable ? "Enable" : "Disable", VTY_NEWLINE);
 
-  vty_out(vty, "%s", VTY_NEWLINE);
+  vty_out (vty, "%s", VTY_NEWLINE);
 
   show_rpf_refresh_stats(vty, now, NULL);
 
@@ -3861,6 +4003,58 @@ DEFUN (no_ip_ssmpingd,
   return CMD_SUCCESS;
 }
 
+DEFUN (ip_pim_ecmp,
+       ip_pim_ecmp_cmd,
+       "ip pim ecmp",
+       IP_STR
+       "pim multicast routing\n"
+       "Enable PIM ECMP \n")
+{
+  qpim_ecmp_enable = 1;
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (no_ip_pim_ecmp,
+       no_ip_pim_ecmp_cmd,
+       "no ip pim ecmp",
+       NO_STR
+       IP_STR
+       "pim multicast routing\n"
+       "Disable PIM ECMP \n")
+{
+  qpim_ecmp_enable = 0;
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (ip_pim_ecmp_rebalance,
+       ip_pim_ecmp_rebalance_cmd,
+       "ip pim ecmp rebalance",
+       IP_STR
+       "pim multicast routing\n"
+       "Enable PIM ECMP \n"
+       "Enable PIM ECMP Rebalance\n")
+{
+  qpim_ecmp_rebalance_enable = 1;
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (no_ip_pim_ecmp_rebalance,
+       no_ip_pim_ecmp_rebalance_cmd,
+       "no ip pim ecmp rebalance",
+       NO_STR
+       IP_STR
+       "pim multicast routing\n"
+       "Disable PIM ECMP \n"
+       "Disable PIM ECMP Rebalance\n")
+{
+  qpim_ecmp_rebalance_enable = 0;
+
+  return CMD_SUCCESS;
+}
+
 static int
 pim_cmd_igmp_start (struct vty *vty, struct interface *ifp)
 {
@@ -6228,6 +6422,10 @@ void pim_cmd_init()
   install_element (CONFIG_NODE, &no_ip_ssmpingd_cmd); 
   install_element (CONFIG_NODE, &ip_msdp_peer_cmd);
   install_element (CONFIG_NODE, &no_ip_msdp_peer_cmd);
+  install_element (CONFIG_NODE, &ip_pim_ecmp_cmd);
+  install_element (CONFIG_NODE, &no_ip_pim_ecmp_cmd);
+  install_element (CONFIG_NODE, &ip_pim_ecmp_rebalance_cmd);
+  install_element (CONFIG_NODE, &no_ip_pim_ecmp_rebalance_cmd);
 
   install_element (INTERFACE_NODE, &interface_ip_igmp_cmd);
   install_element (INTERFACE_NODE, &interface_no_ip_igmp_cmd); 
@@ -6283,6 +6481,8 @@ void pim_cmd_init()
   install_element (VIEW_NODE, &show_ip_rib_cmd);
   install_element (VIEW_NODE, &show_ip_ssmpingd_cmd);
   install_element (VIEW_NODE, &show_debugging_pim_cmd);
+  install_element (VIEW_NODE, &show_ip_pim_nexthop_cmd);
+  install_element (VIEW_NODE, &show_ip_pim_nexthop_lookup_cmd);
 
   install_element (ENABLE_NODE, &clear_ip_interfaces_cmd);
   install_element (ENABLE_NODE, &clear_ip_igmp_interfaces_cmd);
index 086479643166c5b03bc7e145e0ac31da5d3117ef..bdad1c531cefdde888d9ff9eff9c0c354fd79d1f 100644 (file)
@@ -30,6 +30,7 @@
 #include "hash.h"
 
 #include "pimd.h"
+#include "pim_zebra.h"
 #include "pim_iface.h"
 #include "pim_igmp.h"
 #include "pim_mroute.h"
@@ -42,6 +43,7 @@
 #include "pim_time.h"
 #include "pim_ssmpingd.h"
 #include "pim_rp.h"
+#include "pim_nht.h"
 
 struct interface *pim_regiface = NULL;
 struct list *pim_ifchannel_list = NULL;
@@ -583,21 +585,40 @@ void pim_if_addr_add(struct connected *ifc)
     }
   } /* igmp */
 
-  if (PIM_IF_TEST_PIM(pim_ifp->options)) {
-
-    /* Interface has a valid primary address ? */
-    if (PIM_INADDR_ISNOT_ANY(pim_ifp->primary_address)) {
-
-      /* Interface has a valid socket ? */
-      if (pim_ifp->pim_sock_fd < 0) {
-       if (pim_sock_add(ifp)) {
-         zlog_warn("Failure creating PIM socket for interface %s",
-                   ifp->name);
-       }
-      }
+  if (PIM_IF_TEST_PIM(pim_ifp->options))
+    {
 
-    }
-  } /* pim */
+      if (PIM_INADDR_ISNOT_ANY (pim_ifp->primary_address))
+        {
+
+          /* Interface has a valid socket ? */
+          if (pim_ifp->pim_sock_fd < 0)
+            {
+              if (pim_sock_add (ifp))
+                {
+                  zlog_warn ("Failure creating PIM socket for interface %s",
+                             ifp->name);
+                }
+            }
+          struct pim_nexthop_cache *pnc = NULL;
+          struct pim_rpf rpf;
+          struct zclient *zclient = NULL;
+
+          zclient = pim_zebra_zclient_get ();
+          /* RP config might come prior to (local RP's interface) IF UP event.
+             In this case, pnc would not have pim enabled nexthops.
+             Once Interface is UP and pim info is available, reregister
+             with RNH address to receive update and add the interface as nexthop. */
+          memset (&rpf, 0, sizeof (struct pim_rpf));
+          rpf.rpf_addr.family = AF_INET;
+          rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
+          rpf.rpf_addr.u.prefix4 = ifc->address->u.prefix4;
+          pnc = pim_nexthop_cache_find (&rpf);
+          if (pnc)
+            pim_sendmsg_zebra_rnh (zclient, pnc,
+                                   ZEBRA_NEXTHOP_REGISTER);
+        }
+    } /* pim */
 
     /*
       PIM or IGMP is enabled on interface, and there is at least one
index 1df4c033bc37a80e54ac3c1ee04cb65183fd8c3b..ce2f98f318bcf6d58bcd9cb15586bd4c1f5a6ded 100644 (file)
@@ -178,6 +178,15 @@ pim_mroute_msg_nocache (int fd, struct interface *ifp, const struct igmpmsg *msg
 
   up->channel_oil->cc.pktcnt++;
   PIM_UPSTREAM_FLAG_SET_FHR(up->flags);
+  // resolve mfcc_parent prior to mroute_add in channel_add_oif
+  if (up->channel_oil->oil.mfcc_parent >= MAXVIFS)
+    {
+      int vif_index = 0;
+      vif_index =
+        pim_if_find_vifindex_by_ifindex (up->rpf.source_nexthop.
+                                         interface->ifindex);
+      up->channel_oil->oil.mfcc_parent = vif_index;
+    }
   pim_register_join (up);
 
   return 0;
@@ -882,9 +891,8 @@ int pim_mroute_del (struct channel_oil *c_oil, const char *name)
                  pim_channel_oil_dump (c_oil, buf, sizeof(buf)));
     }
 
-  /*reset incoming vifi and kernel installed flags*/
+  //Reset kernel installed flag
   c_oil->installed = 0;
-  c_oil->oil.mfcc_parent = MAXVIFS;
 
   return 0;
 }
index c1325df2605ec16f1cc1f9ad98a992dc6935b2f9..71d7ac414800d40d76ac7132a6d07301083f9135 100644 (file)
@@ -502,6 +502,12 @@ struct pim_neighbor *pim_neighbor_add(struct interface *ifp,
 
   listnode_add(pim_ifp->pim_neighbor_list, neigh);
 
+  if (PIM_DEBUG_PIM_TRACE_DETAIL)
+    {
+      char str[INET_ADDRSTRLEN];
+      pim_inet4_dump("<nht_nbr?>", source_addr, str, sizeof (str));
+      zlog_debug ("%s: neighbor %s added ", __PRETTY_FUNCTION__, str);
+    }
   /*
     RFC 4601: 4.3.2.  DR Election
 
@@ -532,6 +538,14 @@ struct pim_neighbor *pim_neighbor_add(struct interface *ifp,
 
   pim_upstream_find_new_rpf();
 
+  /* RNH can send nexthop update prior to PIM neibhor UP
+     in that case nexthop cache would not consider this neighbor
+     as RPF.
+     Upon PIM neighbor UP, iterate all RPs and update
+     nexthop cache with this neighbor.
+   */
+  pim_resolve_rp_nh ();
+
   pim_rp_setup ();
 
   pim_neighbor_rpf_update();
index fe96d01a06516dc507b1388ecc2752e2d193146d..23efd554ea0f2bbe294a0bf4d5b33be4c38f6409 100644 (file)
 #include "pim_join.h"
 #include "pim_jp_agg.h"
 #include "pim_zebra.h"
+#include "pim_zlookup.h"
 
 /**
  * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
  *   command to Zebra.
  */
-static void
+void
 pim_sendmsg_zebra_rnh (struct zclient *zclient, struct pim_nexthop_cache *pnc,
                        int command)
 {
@@ -87,9 +88,9 @@ pim_sendmsg_zebra_rnh (struct zclient *zclient, struct pim_nexthop_cache *pnc,
     {
       char buf[PREFIX2STR_BUFFER];
       prefix2str (p, buf, sizeof (buf));
-      zlog_debug ("%s: NHT Addr %s %sregistered with Zebra ret:%d ",
-                  __PRETTY_FUNCTION__, buf,
-                  (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", ret);
+      zlog_debug ("%s: NHT %sregistered addr %s with Zebra ret:%d ",
+                  __PRETTY_FUNCTION__,
+                  (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", buf, ret);
     }
 
   return;
@@ -149,7 +150,8 @@ pim_nexthop_cache_add (struct pim_rpf *rpf_addr)
 /* This API is used to Register an address with Zebra */
 int
 pim_find_or_track_nexthop (struct prefix *addr, struct pim_upstream *up,
-                           struct rp_info *rp)
+                           struct rp_info *rp,
+                           struct pim_nexthop_cache *out_pnc)
 {
   struct pim_nexthop_cache *pnc = NULL;
   struct pim_rpf rpf;
@@ -174,11 +176,14 @@ pim_find_or_track_nexthop (struct prefix *addr, struct pim_upstream *up,
         }
       pnc = pim_nexthop_cache_add (&rpf);
       if (pnc)
-        pim_sendmsg_zebra_rnh (zclient, pnc,
-                               ZEBRA_NEXTHOP_REGISTER);
+        pim_sendmsg_zebra_rnh (zclient, pnc, ZEBRA_NEXTHOP_REGISTER);
       else
         {
-          zlog_warn ("%s: pnc node allocation failed. ", __PRETTY_FUNCTION__);
+          char rpf_str[PREFIX_STRLEN];
+          pim_addr_dump ("<nht-pnc?>", addr, rpf_str, sizeof (rpf_str));
+          zlog_warn ("%s: pnc node allocation failed. addr %s ",
+                     __PRETTY_FUNCTION__, rpf_str);
+          return -1;
         }
     }
 
@@ -192,7 +197,7 @@ pim_find_or_track_nexthop (struct prefix *addr, struct pim_upstream *up,
               char rp_str[PREFIX_STRLEN];
               pim_addr_dump ("<rp?>", &rp->rp.rpf_addr, rp_str,
                              sizeof (rp_str));
-              zlog_debug ("%s: NHT add RP %s node to cached list",
+              zlog_debug ("%s: Add RP %s node to pnc cached list",
                           __PRETTY_FUNCTION__, rp_str);
             }
           listnode_add_sort (pnc->rp_list, rp);
@@ -209,15 +214,18 @@ pim_find_or_track_nexthop (struct prefix *addr, struct pim_upstream *up,
               char buf[PREFIX2STR_BUFFER];
               prefix2str (addr, buf, sizeof (buf));
               zlog_debug
-                ("%s: NHT add upstream %s node to cached list, rpf %s",
+                ("%s: Add upstream %s node to pnc cached list, rpf %s",
                  __PRETTY_FUNCTION__, up->sg_str, buf);
             }
           listnode_add_sort (pnc->upstream_list, up);
         }
     }
 
-  if (CHECK_FLAG (pnc->flags, PIM_NEXTHOP_VALID))
-    return 1;
+  if (pnc && CHECK_FLAG (pnc->flags, PIM_NEXTHOP_VALID))
+    {
+      memcpy (out_pnc, pnc, sizeof (struct pim_nexthop_cache));
+      return 1;
+    }
 
   return 0;
 }
@@ -249,8 +257,7 @@ pim_delete_tracked_nexthop (struct prefix *addr, struct pim_upstream *up,
 
       if (pnc->rp_list->count == 0 && pnc->upstream_list->count == 0)
         {
-          pim_sendmsg_zebra_rnh (zclient, pnc,
-                                 ZEBRA_NEXTHOP_UNREGISTER);
+          pim_sendmsg_zebra_rnh (zclient, pnc, ZEBRA_NEXTHOP_UNREGISTER);
 
           list_delete (pnc->rp_list);
           list_delete (pnc->upstream_list);
@@ -277,21 +284,16 @@ pim_update_rp_nh (struct pim_nexthop_cache *pnc)
       if (rp_info->rp.rpf_addr.u.prefix4.s_addr == INADDR_NONE)
         continue;
 
-      if (pim_nexthop_lookup (&rp_info->rp.source_nexthop,
-                              rp_info->rp.rpf_addr.u.prefix4, 1) != 0)
-        {
-          if (PIM_DEBUG_PIM_TRACE)
-            zlog_debug ("Unable to lookup nexthop for rp specified");
-          ret++;
-          continue;
-        }
+      //Compute PIM RPF using cached nexthop
+      pim_ecmp_nexthop_search (pnc, &rp_info->rp.source_nexthop,
+                               &rp_info->rp.rpf_addr, &rp_info->group, 1);
 
       if (PIM_DEBUG_TRACE)
         {
           char rp_str[PREFIX_STRLEN];
           pim_addr_dump ("<rp?>", &rp_info->rp.rpf_addr, rp_str,
                          sizeof (rp_str));
-          zlog_debug ("%s: NHT update nexthop for RP %s to interface %s ",
+          zlog_debug ("%s: NHT update, nexthop for RP %s is interface %s ",
                       __PRETTY_FUNCTION__, rp_str,
                       rp_info->rp.source_nexthop.interface->name);
         }
@@ -304,6 +306,48 @@ pim_update_rp_nh (struct pim_nexthop_cache *pnc)
 
 }
 
+/* This API is used to traverse nexthop cache of RPF addr
+   of upstream entry whose IPv4 nexthop address is in
+   unresolved state and due to event like pim neighbor
+   UP event if it can be resolved.
+*/
+void
+pim_resolve_upstream_nh (struct prefix *nht_p)
+{
+  struct nexthop *nh_node = NULL;
+  struct pim_nexthop_cache pnc;
+  struct pim_neighbor *nbr = NULL;
+
+  memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+  if ((pim_find_or_track_nexthop (nht_p, NULL, NULL, &pnc)) == 1)
+    {
+      for (nh_node = pnc.nexthop; nh_node; nh_node = nh_node->next)
+        {
+          if (nh_node->gate.ipv4.s_addr == 0)
+            {
+              nbr =
+                pim_neighbor_find_if (if_lookup_by_index
+                                      (nh_node->ifindex, VRF_DEFAULT));
+              if (nbr)
+                {
+                  nh_node->gate.ipv4 = nbr->source_addr;
+                  if (PIM_DEBUG_TRACE)
+                    {
+                      char str[PREFIX_STRLEN];
+                      char str1[INET_ADDRSTRLEN];
+                      pim_inet4_dump ("<nht_nbr?>", nbr->source_addr, str1,
+                                      sizeof (str1));
+                      pim_addr_dump ("<nht_addr?>", nht_p, str, sizeof (str));
+                      zlog_debug
+                        ("%s: addr %s new nexthop addr %s ifindex %d ",
+                         __PRETTY_FUNCTION__, str, str1, nh_node->ifindex);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
 static int
 pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
@@ -312,8 +356,8 @@ pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
   struct listnode     *ifnode;
   struct listnode     *up_nextnode;
   struct listnode     *node;
-  struct pim_upstream *up;
-  struct interface    *ifp;
+  struct pim_upstream *up = NULL;
+  struct interface    *ifp = NULL;
   int                 vif_index = 0;
 
   for (ALL_LIST_ELEMENTS (pnc->upstream_list, up_node, up_nextnode, up))
@@ -326,6 +370,26 @@ pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
       if (rpf_result == PIM_RPF_FAILURE)
         continue;
 
+      /* update kernel multicast forwarding cache (MFC) */
+      if (up->channel_oil)
+        {
+          vif_index =
+            pim_if_find_vifindex_by_ifindex (up->rpf.
+                                             source_nexthop.interface->
+                                             ifindex);
+          /* Pass Current selected NH vif index to mroute download */
+          if (vif_index)
+            pim_scan_individual_oil (up->channel_oil, vif_index);
+          else
+            {
+              if (PIM_DEBUG_ZEBRA)
+                zlog_debug
+                  ("%s: NHT upstream %s channel_oil IIF %s vif_index is not valid",
+                   __PRETTY_FUNCTION__, up->sg_str,
+                   up->rpf.source_nexthop.interface->name);
+            }
+        }
+
       if (rpf_result == PIM_RPF_CHANGED)
         {
           struct pim_neighbor *nbr;
@@ -377,7 +441,7 @@ pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
              returning PIM_RPF_CHANGED ? */
           pim_upstream_update_join_desired (up);
 
-        } /* PIM_RPF_CHANGED */
+        }                       /* PIM_RPF_CHANGED */
 
       if (PIM_DEBUG_TRACE)
         {
@@ -386,26 +450,7 @@ pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
                       old.source_nexthop.interface->name,
                       up->rpf.source_nexthop.interface->name);
         }
-      /* update kernel multicast forwarding cache (MFC) */
-      if (up->channel_oil)
-        {
-          vif_index =
-            pim_if_find_vifindex_by_ifindex (up->rpf.
-                                             source_nexthop.interface->
-                                             ifindex);
-          /* Pass Current selected NH vif index to mroute download */
-          if (vif_index)
-            pim_scan_individual_oil (up->channel_oil, vif_index);
-          else
-            {
-              if (PIM_DEBUG_ZEBRA)
-                zlog_debug ("%s: NHT upstream %s channel_oil IIF %s vif_index is not valid",
-                      __PRETTY_FUNCTION__, up->sg_str,
-                      up->rpf.source_nexthop.interface->name);
-            }
-        }
-
-    } /* for (pnc->upstream_list) */
+    }                           /* for (pnc->upstream_list) */
 
   for (ALL_LIST_ELEMENTS_RO (vrf_iflist (VRF_DEFAULT), ifnode, ifp))
     if (ifp->info)
@@ -426,10 +471,209 @@ pim_update_upstream_nh (struct pim_nexthop_cache *pnc)
   return 0;
 }
 
-/*
- * This API is used to parse Registered address nexthop update
- * coming from Zebra
- */
+uint32_t
+pim_compute_ecmp_hash (struct prefix * src, struct prefix * grp)
+{
+  uint32_t hash_val;
+  uint32_t s = 0, g = 0;
+
+  if ((!src))
+    return 0;
+
+  switch (src->family)
+    {
+    case AF_INET:
+      {
+        s = src->u.prefix4.s_addr;
+        s = s == 0 ? 1 : s;
+        if (grp)
+          g = grp->u.prefix4.s_addr;
+      }
+      break;
+    default:
+      break;
+    }
+
+  hash_val = jhash_2words (g, s, 101);
+  if (PIM_DEBUG_TRACE)
+    {
+      char buf[PREFIX2STR_BUFFER];
+      char bufg[PREFIX2STR_BUFFER];
+      prefix2str (src, buf, sizeof (buf));
+      if (grp)
+        prefix2str (grp, bufg, sizeof (bufg));
+      zlog_debug ("%s: addr %s %s hash_val %u", __PRETTY_FUNCTION__, buf,
+                  grp ? bufg : "", hash_val);
+
+    }
+  return hash_val;
+}
+
+int
+pim_ecmp_nexthop_search (struct pim_nexthop_cache *pnc,
+                         struct pim_nexthop *nexthop, struct prefix *src,
+                         struct prefix *grp, int neighbor_needed)
+{
+  struct pim_neighbor *nbr = NULL;
+  struct nexthop *nh_node = NULL;
+  ifindex_t first_ifindex;
+  struct interface *ifp = NULL;
+  uint32_t hash_val = 0, mod_val = 0;
+  uint8_t nh_iter = 0, found = 0;
+
+  if (!pnc || !pnc->nexthop_num || !nexthop)
+    return -1;
+
+  if (qpim_ecmp_enable)
+    {
+      //User configured knob to explicitly switch to new path.
+      if (qpim_ecmp_rebalance_enable == 0)
+        {
+          //Current Nexthop is VALID then stay on the current path.
+          if (nexthop->interface && nexthop->interface->info &&
+              nexthop->mrib_nexthop_addr.u.prefix4.s_addr !=
+              PIM_NET_INADDR_ANY)
+            {
+              if (neighbor_needed
+                  && !pim_if_connected_to_source (nexthop->interface,
+                                                  src->u.prefix4))
+                {
+                  nbr = pim_neighbor_find (nexthop->interface,
+                                           nexthop->mrib_nexthop_addr.
+                                           u.prefix4);
+                  if (!nbr && !if_is_loopback (nexthop->interface))
+                    {
+                      if (PIM_DEBUG_TRACE)
+                        zlog_debug ("%s: current nexthop does not have nbr ",
+                                    __PRETTY_FUNCTION__);
+                    }
+                  else
+                    {
+                      if (PIM_DEBUG_TRACE)
+                        {
+                          char src_str[INET_ADDRSTRLEN];
+                          pim_inet4_dump ("<addr?>", src->u.prefix4, src_str,
+                                          sizeof (src_str));
+                          char grp_str[INET_ADDRSTRLEN];
+                          pim_inet4_dump ("<addr?>", grp->u.prefix4, grp_str,
+                                          sizeof (grp_str));
+                          zlog_debug
+                            ("%s: %s %s current nexthop %d is valid, not choosing new path",
+                             __PRETTY_FUNCTION__, src_str, grp_str,
+                             nexthop->interface->ifindex);
+                        }
+                      return 0;
+                    }
+                }
+            }
+        }
+      //PIM ECMP flag is enable then choose ECMP path.
+      hash_val = pim_compute_ecmp_hash (src, grp);
+      mod_val = hash_val % pnc->nexthop_num;
+      if (PIM_DEBUG_TRACE)
+        zlog_debug ("%s: hash_val %u mod_val %u ",
+                    __PRETTY_FUNCTION__, hash_val, mod_val);
+    }
+
+  for (nh_node = pnc->nexthop; nh_node && (found == 0);
+       nh_node = nh_node->next)
+    {
+      first_ifindex = nh_node->ifindex;
+      ifp = if_lookup_by_index (first_ifindex, VRF_DEFAULT);
+      if (!ifp)
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char addr_str[INET_ADDRSTRLEN];
+              pim_inet4_dump ("<addr?>", src->u.prefix4, addr_str,
+                              sizeof (addr_str));
+              zlog_debug
+                ("%s %s: could not find interface for ifindex %d (address %s)",
+                 __FILE__, __PRETTY_FUNCTION__, first_ifindex, addr_str);
+            }
+          if (nh_iter == mod_val)
+            mod_val++;          //Select nexthpath
+          nh_iter++;
+          continue;
+        }
+      if (!ifp->info)
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char addr_str[INET_ADDRSTRLEN];
+              pim_inet4_dump ("<addr?>", src->u.prefix4, addr_str,
+                              sizeof (addr_str));
+              zlog_debug
+                ("%s: multicast not enabled on input interface %s (ifindex=%d, RPF for source %s)",
+                 __PRETTY_FUNCTION__, ifp->name, first_ifindex, addr_str);
+            }
+          if (nh_iter == mod_val)
+            mod_val++;          //Select nexthpath
+          nh_iter++;
+          continue;
+        }
+
+      if (neighbor_needed
+          && !pim_if_connected_to_source (ifp, src->u.prefix4))
+        {
+          nbr = pim_neighbor_find (ifp, nh_node->gate.ipv4);
+          if (PIM_DEBUG_PIM_TRACE_DETAIL)
+            zlog_debug ("ifp name: %s, pim nbr: %p", ifp->name, nbr);
+          if (!nbr && !if_is_loopback (ifp))
+            {
+              if (PIM_DEBUG_ZEBRA)
+                zlog_debug
+                  ("%s: pim nbr not found on input interface %s",
+                   __PRETTY_FUNCTION__, ifp->name);
+              if (nh_iter == mod_val)
+                mod_val++;      //Select nexthpath
+              nh_iter++;
+              continue;
+            }
+        }
+
+      if (nh_iter == mod_val)
+        {
+          nexthop->interface                    = ifp;
+          nexthop->mrib_nexthop_addr.family     = AF_INET;
+          nexthop->mrib_nexthop_addr.prefixlen  = IPV4_MAX_BITLEN;
+          nexthop->mrib_nexthop_addr.u.prefix4  = nh_node->gate.ipv4;
+          nexthop->mrib_metric_preference       = pnc->distance;
+          nexthop->mrib_route_metric            = pnc->metric;
+          nexthop->last_lookup                  = src->u.prefix4;
+          nexthop->last_lookup_time             = pim_time_monotonic_usec ();
+          nexthop->nbr                          = nbr;
+          found                                 = 1;
+
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char buf[NEXTHOP_STRLEN];
+              char buf2[PREFIX2STR_BUFFER];
+              char buf3[PREFIX2STR_BUFFER];
+              char buf4[PREFIX2STR_BUFFER];
+              pim_inet4_dump ("<src?>", src->u.prefix4, buf2, sizeof (buf2));
+              if (grp)
+                pim_inet4_dump ("<src?>", grp->u.prefix4, buf3,
+                                sizeof (buf3));
+              pim_inet4_dump ("<rpf?>",
+                              nexthop->mrib_nexthop_addr.u.prefix4, buf4,
+                              sizeof (buf4));
+              snprintf (buf, sizeof (buf), "%s if %u",
+                        inet_ntoa (nh_node->gate.ipv4), nh_node->ifindex);
+              zlog_debug
+                ("%s: NHT %s %s selected nhop interface %s nhop %s (%s) mod_val:%u iter:%d ecmp_enable:%d",
+                 __PRETTY_FUNCTION__, buf2, grp ? buf3 : " ", ifp->name,
+                 buf, buf4, mod_val, nh_iter, qpim_ecmp_enable);
+            }
+        }
+      nh_iter++;
+
+    }
+
+  return 0;
+}
+
+/* This API is used to parse Registered address nexthop update coming from Zebra */
 int
 pim_parse_nexthop_update (int command, struct zclient *zclient,
                           zebra_size_t length, vrf_id_t vrf_id)
@@ -437,7 +681,6 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
   struct stream *s;
   struct prefix p;
   struct nexthop *nexthop;
-  struct nexthop *oldnh;
   struct nexthop *nhlist_head = NULL;
   struct nexthop *nhlist_tail = NULL;
   uint32_t metric, distance;
@@ -476,8 +719,9 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
             {
               char buf[PREFIX2STR_BUFFER];
               prefix2str (&rpf.rpf_addr, buf, sizeof (buf));
-              zlog_debug ("%s: NHT addr %s is not in local cached DB.",
-                          __PRETTY_FUNCTION__, buf);
+              zlog_debug
+                ("%s: Skipping NHT update, addr %s is not in local cached DB.",
+                 __PRETTY_FUNCTION__, buf);
             }
           return 0;
         }
@@ -490,7 +734,7 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
       return 0;
     }
 
-  pnc->last_update = pim_time_monotonic_sec ();
+  pnc->last_update = pim_time_monotonic_usec ();
   distance = stream_getc (s);
   metric = stream_getl (s);
   nexthop_num = stream_getc (s);
@@ -499,17 +743,15 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
     {
       char buf[PREFIX2STR_BUFFER];
       prefix2str (&p, buf, sizeof (buf));
-      zlog_debug ("%s: NHT Update for %s nexthop_num %d vrf:%d upcount %d rpcount %d",
-                  __PRETTY_FUNCTION__, buf, nexthop_num, vrf_id,
-                  listcount (pnc->upstream_list), listcount (pnc->rp_list));
+      zlog_debug
+        ("%s: NHT Update for %s nexthop_num %d vrf:%d upcount %d rpcount %d",
+         __PRETTY_FUNCTION__, buf, nexthop_num, vrf_id,
+         listcount (pnc->upstream_list), listcount (pnc->rp_list));
     }
 
   if (nexthop_num)
     {
-      pnc->flags |= PIM_NEXTHOP_VALID;
-      pnc->distance = distance;
-      pnc->metric = metric;
-      pnc->nexthop_num = nexthop_num;
+      pnc->nexthop_num = 0;     //Only increment for pim enabled rpf.
 
       for (i = 0; i < nexthop_num; i++)
         {
@@ -539,7 +781,34 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
                                       (nexthop->ifindex, VRF_DEFAULT));
               /* Overwrite with Nbr address as NH addr */
               if (nbr)
-                nexthop->gate.ipv4 = nbr->source_addr;
+                {
+                  nexthop->gate.ipv4 = nbr->source_addr;
+                  if (PIM_DEBUG_TRACE)
+                    {
+                      char str[INET_ADDRSTRLEN];
+                      pim_inet4_dump ("<nht_nbr?>", nbr->source_addr, str,
+                                      sizeof (str));
+                      zlog_debug
+                        ("%s: NHT using pim nbr addr %s ifindex %d as rpf",
+                         __PRETTY_FUNCTION__, str, nexthop->ifindex);
+                    }
+                }
+              else
+                {
+                  if (PIM_DEBUG_TRACE)
+                    {
+                      struct interface *ifp1 =
+                        if_lookup_by_index (nexthop->ifindex,
+                                            VRF_DEFAULT);
+                      struct pim_interface *pim_ifp = ifp1->info;
+                      zlog_debug
+                        ("%s: NHT pim nbr not found on interface %s nbr count:%d ",
+                         __PRETTY_FUNCTION__, ifp1->name,
+                         pim_ifp->pim_neighbor_list->count);
+                    }
+                  //Mark nexthop address to 0 until PIM Nbr is resolved.
+                  nexthop->gate.ipv4.s_addr = PIM_NET_INADDR_ANY;
+                }
 
               break;
             default:
@@ -562,9 +831,10 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
               if (PIM_DEBUG_ZEBRA)
                 {
                   char buf[NEXTHOP_STRLEN];
-                  zlog_debug("%s: could not find interface for ifindex %d (addr %s)",
-                         __PRETTY_FUNCTION__,
-                         nexthop->ifindex, nexthop2str (nexthop, buf, sizeof (buf)));
+                  zlog_debug
+                    ("%s: could not find interface for ifindex %d (addr %s)",
+                     __PRETTY_FUNCTION__, nexthop->ifindex,
+                     nexthop2str (nexthop, buf, sizeof (buf)));
                 }
               nexthop_free (nexthop);
               continue;
@@ -594,14 +864,18 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
               nhlist_tail = nexthop;
               nhlist_head = nexthop;
             }
-
-          for (oldnh = pnc->nexthop; oldnh; oldnh = oldnh->next)
-            if (nexthop_same_no_recurse (oldnh, nexthop))
-              break;
+          //Only keep track of nexthops which are PIM enabled.
+          pnc->nexthop_num++;
         }
       /* Reset existing pnc->nexthop before assigning new list */
       nexthops_free (pnc->nexthop);
       pnc->nexthop = nhlist_head;
+      if (pnc->nexthop_num)
+        {
+          pnc->flags |= PIM_NEXTHOP_VALID;
+          pnc->distance = distance;
+          pnc->metric = metric;
+        }
     }
   else
     {
@@ -611,6 +885,16 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
       pnc->nexthop = NULL;
     }
 
+  if (PIM_DEBUG_TRACE)
+    {
+      char buf[PREFIX2STR_BUFFER];
+      prefix2str (&p, buf, sizeof (buf));
+      zlog_debug
+        ("%s: NHT Update for %s nexthop_num:%d pim nexthop_num %d vrf:%d up %d rp %d",
+         __PRETTY_FUNCTION__, buf, nexthop_num, pnc->nexthop_num, vrf_id,
+         listcount (pnc->upstream_list), listcount (pnc->rp_list));
+    }
+
   pim_rpf_set_refresh_time ();
 
   if (listcount (pnc->rp_list))
@@ -620,3 +904,142 @@ pim_parse_nexthop_update (int command, struct zclient *zclient,
 
   return 0;
 }
+
+int
+pim_ecmp_nexthop_lookup (struct pim_nexthop *nexthop, struct in_addr addr,
+                         struct prefix *src, struct prefix *grp,
+                         int neighbor_needed)
+{
+  struct pim_zlookup_nexthop nexthop_tab[MULTIPATH_NUM];
+  struct pim_neighbor *nbr = NULL;
+  int num_ifindex;
+  struct interface *ifp;
+  int first_ifindex;
+  int found = 0;
+  uint8_t i = 0;
+  uint32_t hash_val = 0, mod_val = 0;
+
+  if (PIM_DEBUG_TRACE)
+    {
+      char addr_str[INET_ADDRSTRLEN];
+      pim_inet4_dump ("<addr?>", addr, addr_str, sizeof (addr_str));
+      zlog_debug ("%s: Looking up: %s, last lookup time: %lld",
+                  __PRETTY_FUNCTION__, addr_str, nexthop->last_lookup_time);
+    }
+
+  memset (nexthop_tab, 0,
+          sizeof (struct pim_zlookup_nexthop) * MULTIPATH_NUM);
+  num_ifindex =
+    zclient_lookup_nexthop (nexthop_tab, MULTIPATH_NUM, addr,
+                            PIM_NEXTHOP_LOOKUP_MAX);
+  if (num_ifindex < 1)
+    {
+      char addr_str[INET_ADDRSTRLEN];
+      pim_inet4_dump ("<addr?>", addr, addr_str, sizeof (addr_str));
+      zlog_warn ("%s %s: could not find nexthop ifindex for address %s",
+                 __FILE__, __PRETTY_FUNCTION__, addr_str);
+      return -1;
+    }
+
+  //If PIM ECMP enable then choose ECMP path
+  if (qpim_ecmp_enable)
+    {
+      hash_val = pim_compute_ecmp_hash (src, grp);
+      mod_val = hash_val % num_ifindex;
+      if (PIM_DEBUG_TRACE)
+        zlog_debug ("%s: hash_val %u mod_val %u ",
+                    __PRETTY_FUNCTION__, hash_val, mod_val);
+    }
+
+  while (!found && (i < num_ifindex))
+    {
+      first_ifindex = nexthop_tab[i].ifindex;
+
+      ifp = if_lookup_by_index (first_ifindex, VRF_DEFAULT);
+      if (!ifp)
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char addr_str[INET_ADDRSTRLEN];
+              pim_inet4_dump ("<addr?>", addr, addr_str, sizeof (addr_str));
+              zlog_debug
+                ("%s %s: could not find interface for ifindex %d (address %s)",
+                 __FILE__, __PRETTY_FUNCTION__, first_ifindex, addr_str);
+            }
+          if (i == mod_val)
+            mod_val++;
+          i++;
+          continue;
+        }
+
+      if (!ifp->info)
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char addr_str[INET_ADDRSTRLEN];
+              pim_inet4_dump ("<addr?>", addr, addr_str, sizeof (addr_str));
+              zlog_debug
+                ("%s: multicast not enabled on input interface %s (ifindex=%d, RPF for source %s)",
+                 __PRETTY_FUNCTION__, ifp->name, first_ifindex, addr_str);
+            }
+          if (i == mod_val)
+            mod_val++;
+          i++;
+          continue;
+        }
+      if (neighbor_needed && !pim_if_connected_to_source (ifp, addr))
+        {
+          nbr =
+            pim_neighbor_find (ifp, nexthop_tab[i].nexthop_addr.u.prefix4);
+          if (PIM_DEBUG_PIM_TRACE_DETAIL)
+            zlog_debug ("ifp name: %s, pim nbr: %p", ifp->name, nbr);
+          if (!nbr && !if_is_loopback (ifp))
+            {
+              if (i == mod_val)
+                mod_val++;
+              i++;
+              if (PIM_DEBUG_ZEBRA)
+                {
+                  char addr_str[INET_ADDRSTRLEN];
+                  pim_inet4_dump ("<addr?>", addr, addr_str,
+                                  sizeof (addr_str));
+                  zlog_debug
+                    ("%s: NBR not found on input interface %s (RPF for source %s)",
+                     __PRETTY_FUNCTION__, ifp->name, addr_str);
+                }
+              continue;
+            }
+        }
+
+      if (i == mod_val)
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char nexthop_str[PREFIX_STRLEN];
+              char addr_str[INET_ADDRSTRLEN];
+              pim_addr_dump ("<nexthop?>", &nexthop_tab[i].nexthop_addr,
+                             nexthop_str, sizeof (nexthop_str));
+              pim_inet4_dump ("<addr?>", addr, addr_str, sizeof (addr_str));
+              zlog_debug
+                ("%s %s: found nexthop %s for addr %s interface %s metric=%d pref=%d",
+                 __FILE__, __PRETTY_FUNCTION__, nexthop_str, addr_str,
+                 ifp->name, nexthop_tab[i].route_metric,
+                 nexthop_tab[i].protocol_distance);
+            }
+          /* update nextop data */
+          nexthop->interface                = ifp;
+          nexthop->mrib_nexthop_addr        = nexthop_tab[i].nexthop_addr;
+          nexthop->mrib_metric_preference   = nexthop_tab[i].protocol_distance;
+          nexthop->mrib_route_metric        = nexthop_tab[i].route_metric;
+          nexthop->last_lookup              = addr;
+          nexthop->last_lookup_time         = pim_time_monotonic_usec();
+          nexthop->nbr                      = nbr;
+          found                             = 1;
+        }
+      i++;
+    }
+  if (found)
+    return 0;
+  else
+    return -1;
+}
index 5348ec3704a6a875f39fff1e06fca05017c3fdfb..b4b2d91e47a550fd0a0b46484bc454ceabc1d109 100644 (file)
@@ -51,10 +51,19 @@ struct pim_nexthop_cache
 int pim_parse_nexthop_update (int command, struct zclient *zclient,
                               zebra_size_t length, vrf_id_t vrf_id);
 int pim_find_or_track_nexthop (struct prefix *addr, struct pim_upstream *up,
-                               struct rp_info *rp);
+                               struct rp_info *rp, struct pim_nexthop_cache *out_pnc);
 void pim_delete_tracked_nexthop (struct prefix *addr, struct pim_upstream *up,
                                  struct rp_info *rp);
 struct pim_nexthop_cache *pim_nexthop_cache_add (struct pim_rpf *rpf_addr);
 struct pim_nexthop_cache *pim_nexthop_cache_find (struct pim_rpf *rpf);
-
+uint32_t pim_compute_ecmp_hash (struct prefix *src, struct prefix *grp);
+int pim_ecmp_nexthop_search (struct pim_nexthop_cache * pnc,
+                         struct pim_nexthop *nexthop, struct prefix *src,
+                         struct prefix *grp, int neighbor_needed);
+int pim_ecmp_nexthop_lookup (struct pim_nexthop *nexthop, struct in_addr addr,
+                         struct prefix *src, struct prefix *grp,
+                         int neighbor_needed);
+void pim_sendmsg_zebra_rnh (struct zclient *zclient, struct pim_nexthop_cache *pnc,
+                          int command);
+void pim_resolve_upstream_nh (struct prefix *nht_p);
 #endif
index 2701079053d147b4b42d9a61219663a46401a047..2d4aa3febc40c7a8d401f25af5349857faec230a 100644 (file)
@@ -289,10 +289,10 @@ pim_channel_del_oif (struct channel_oil *channel_oil,
       char source_str[INET_ADDRSTRLEN];
       pim_inet4_dump("<group?>", channel_oil->oil.mfcc_mcastgrp, group_str, sizeof(group_str));
       pim_inet4_dump("<source?>", channel_oil->oil.mfcc_origin, source_str, sizeof(source_str));
-      zlog_debug("%s %s: (S,G)=(%s,%s): proto_mask=%u OIF=%s vif_index=%d",
+      zlog_debug("%s %s: (S,G)=(%s,%s): proto_mask=%u IIF:%d OIF=%s vif_index=%d",
                 __FILE__, __PRETTY_FUNCTION__,
                 source_str, group_str,
-                proto_mask, oif->name, pim_ifp->mroute_vif_index);
+                proto_mask, channel_oil->oil.mfcc_parent ,oif->name, pim_ifp->mroute_vif_index);
     }
 
   return 0;
index 65a3e8714f87481060e7636e6dc5c1f0e380f705..12f8dd53f268683cd52af94f2888a0399d8f8f32 100644 (file)
@@ -187,6 +187,15 @@ pim_register_send (const uint8_t *buf, int buf_size, struct in_addr src, struct
     return;
   }
 
+  if (PIM_DEBUG_PIM_REG)
+    {
+      char rp_str[INET_ADDRSTRLEN];
+      strcpy (rp_str, inet_ntoa (rpg->rpf_addr.u.prefix4));
+      zlog_debug ("%s: Sending %s %sRegister Packet to %s on %s",
+              __PRETTY_FUNCTION__, up->sg_str,
+              null_register ? "NULL " : "", rp_str, ifp->name);
+    }
+
   memset(buffer, 0, 10000);
   b1 = buffer + PIM_MSG_HEADER_LEN;
   *b1 |= null_register << 6;
index 78bbd14405566540d67434e2c9e7aa33fef64711..ec31069eb5aad118eb5e3e47d88d01f65c45ba4a 100644 (file)
@@ -30,6 +30,7 @@
 #include "vty.h"
 #include "vrf.h"
 #include "plist.h"
+#include "nexthop.h"
 
 #include "pimd.h"
 #include "pim_vty.h"
@@ -280,7 +281,7 @@ pim_rp_check_interfaces (struct rp_info *rp_info)
 int
 pim_rp_new (const char *rp, const char *group_range, const char *plist)
 {
-  int result;
+  int result, ret = 0;
   struct rp_info *rp_info;
   struct rp_info *rp_all;
   struct prefix group_all;
@@ -288,6 +289,7 @@ pim_rp_new (const char *rp, const char *group_range, const char *plist)
   struct rp_info *tmp_rp_info;
   char buffer[BUFSIZ];
   struct prefix nht_p;
+  struct pim_nexthop_cache pnc;
 
   rp_info = XCALLOC (MTYPE_PIM_RP, sizeof (*rp_info));
   if (!rp_info)
@@ -387,21 +389,31 @@ pim_rp_new (const char *rp, const char *group_range, const char *plist)
           /* Register addr with Zebra NHT */
           nht_p.family = AF_INET;
           nht_p.prefixlen = IPV4_MAX_BITLEN;
-          nht_p.u.prefix4 = rp_all->rp.rpf_addr.u.prefix4;
+          nht_p.u.prefix4 = rp_all->rp.rpf_addr.u.prefix4;      //RP address
           if (PIM_DEBUG_PIM_TRACE)
             {
               char buf[PREFIX2STR_BUFFER];
+              char buf1[PREFIX2STR_BUFFER];
               prefix2str (&nht_p, buf, sizeof (buf));
-              zlog_debug ("%s: NHT Register rp_all addr %s with NHT ",
-                        __PRETTY_FUNCTION__, buf);
+              prefix2str (&rp_all->group, buf1, sizeof (buf1));
+              zlog_debug ("%s: NHT Register rp_all addr %s grp %s ",
+                          __PRETTY_FUNCTION__, buf, buf1);
+            }
+          memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+          if ((ret =
+               pim_find_or_track_nexthop (&nht_p, NULL, rp_all, &pnc)) == 1)
+            {
+              //Compute PIM RPF using Cached nexthop
+              pim_ecmp_nexthop_search (&pnc, &rp_all->rp.source_nexthop,
+                                       &nht_p, &rp_all->group, 1);
+            }
+          else
+            {
+              if (pim_nexthop_lookup (&rp_all->rp.source_nexthop, rp_all->rp.rpf_addr.u.prefix4, 1) != 0)
+                return PIM_RP_NO_PATH;
             }
-          pim_find_or_track_nexthop (&nht_p, NULL, rp_all);
-
-          if (pim_nexthop_lookup (&rp_all->rp.source_nexthop, rp_all->rp.rpf_addr.u.prefix4, 1) != 0)
-            return PIM_RP_NO_PATH;
-
           pim_rp_check_interfaces (rp_all);
-         pim_rp_refresh_group_to_rp_mapping();
+          pim_rp_refresh_group_to_rp_mapping ();
           return PIM_SUCCESS;
         }
 
@@ -451,13 +463,25 @@ pim_rp_new (const char *rp, const char *group_range, const char *plist)
   if (PIM_DEBUG_PIM_TRACE)
     {
       char buf[PREFIX2STR_BUFFER];
+      char buf1[PREFIX2STR_BUFFER];
       prefix2str (&nht_p, buf, sizeof (buf));
-      zlog_debug ("%s: NHT Register RP addr %s with Zebra ", __PRETTY_FUNCTION__, buf);
+      prefix2str (&rp_info->group, buf1, sizeof (buf1));
+      zlog_debug ("%s: NHT Register RP addr %s grp %s with Zebra ",
+                  __PRETTY_FUNCTION__, buf, buf1);
     }
-  pim_find_or_track_nexthop (&nht_p, NULL, rp_info);
 
-  if (pim_nexthop_lookup (&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1) != 0)
-    return PIM_RP_NO_PATH;
+  memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+  if ((ret = pim_find_or_track_nexthop (&nht_p, NULL, rp_info, &pnc)) == 1)
+    {
+      //Compute PIM RPF using Cached nexthop
+      pim_ecmp_nexthop_search (&pnc, &rp_info->rp.source_nexthop,
+                               &nht_p, &rp_info->group, 1);
+    }
+  else
+    {
+      if (pim_nexthop_lookup (&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1) != 0)
+        return PIM_RP_NO_PATH;
+    }
 
   pim_rp_check_interfaces (rp_info);
   pim_rp_refresh_group_to_rp_mapping ();
@@ -509,7 +533,8 @@ pim_rp_del (const char *rp, const char *group_range, const char *plist)
     {
       char buf[PREFIX2STR_BUFFER];
       prefix2str (&nht_p, buf, sizeof (buf));
-      zlog_debug ("%s: Deregister RP addr %s with NHT ", __PRETTY_FUNCTION__, buf);
+      zlog_debug ("%s: Deregister RP addr %s with Zebra ", __PRETTY_FUNCTION__,
+                  buf);
     }
   pim_delete_tracked_nexthop (&nht_p, NULL, rp_info);
 
@@ -535,17 +560,39 @@ pim_rp_setup (void)
   struct listnode *node;
   struct rp_info *rp_info;
   int ret = 0;
+  struct prefix nht_p;
+  struct pim_nexthop_cache pnc;
 
   for (ALL_LIST_ELEMENTS_RO (qpim_rp_list, node, rp_info))
     {
       if (rp_info->rp.rpf_addr.u.prefix4.s_addr == INADDR_NONE)
         continue;
 
-      if (pim_nexthop_lookup (&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1) != 0)
+      nht_p.family = AF_INET;
+      nht_p.prefixlen = IPV4_MAX_BITLEN;
+      nht_p.u.prefix4 = rp_info->rp.rpf_addr.u.prefix4;
+      memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+      if ((pim_find_or_track_nexthop (&nht_p, NULL, rp_info, &pnc)) == 1)
         {
-         if (PIM_DEBUG_PIM_TRACE)
-           zlog_debug ("Unable to lookup nexthop for rp specified");
-          ret++;
+          //Compute PIM RPF using Cached nexthop
+          pim_ecmp_nexthop_search (&pnc, &rp_info->rp.source_nexthop,
+                                   &nht_p, &rp_info->group, 1);
+        }
+      else
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char buf[PREFIX2STR_BUFFER];
+              prefix2str (&nht_p, buf, sizeof (buf));
+              zlog_debug ("%s: NHT Local Nexthop not found for RP %s ",
+                          __PRETTY_FUNCTION__, buf);
+            }
+          if (pim_nexthop_lookup (&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1) != 0)
+            {
+              if (PIM_DEBUG_PIM_TRACE)
+                zlog_debug ("Unable to lookup nexthop for rp specified");
+              ret++;
+            }
         }
     }
 
@@ -680,7 +727,43 @@ pim_rp_g (struct in_addr group)
 
   if (rp_info)
     {
-      pim_nexthop_lookup(&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1);
+      int ret = 0;
+      struct prefix nht_p;
+      struct pim_nexthop_cache pnc;
+      /* Register addr with Zebra NHT */
+      nht_p.family = AF_INET;
+      nht_p.prefixlen = IPV4_MAX_BITLEN;
+      nht_p.u.prefix4 = rp_info->rp.rpf_addr.u.prefix4;
+      if (PIM_DEBUG_PIM_TRACE)
+        {
+          char buf[PREFIX2STR_BUFFER];
+          char buf1[PREFIX2STR_BUFFER];
+          prefix2str (&nht_p, buf, sizeof (buf));
+          prefix2str (&rp_info->group, buf1, sizeof (buf1));
+          zlog_debug ("%s: NHT Register RP addr %s grp %s with Zebra ",
+                      __PRETTY_FUNCTION__, buf, buf1);
+        }
+      memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+      if ((ret = pim_find_or_track_nexthop (&nht_p, NULL, rp_info, &pnc)) == 1)
+        {
+          //Compute PIM RPF using Cached nexthop
+          pim_ecmp_nexthop_search (&pnc, &rp_info->rp.source_nexthop,
+                                   &nht_p, &rp_info->group, 1);
+        }
+      else
+        {
+          if (PIM_DEBUG_ZEBRA)
+            {
+              char buf[PREFIX2STR_BUFFER];
+              char buf1[PREFIX2STR_BUFFER];
+              prefix2str (&nht_p, buf, sizeof (buf));
+              prefix2str (&g, buf1, sizeof (buf1));
+              zlog_debug ("%s: NHT nexthop cache not found for RP %s grp %s",
+                          __PRETTY_FUNCTION__, buf, buf1);
+            }
+          pim_rpf_set_refresh_time ();
+          pim_nexthop_lookup (&rp_info->rp.source_nexthop, rp_info->rp.rpf_addr.u.prefix4, 1);
+        }
       return (&rp_info->rp);
     }
 
@@ -862,3 +945,54 @@ pim_rp_show_information (struct vty *vty, u_char uj)
     json_object_free(json);
   }
 }
+
+void
+pim_resolve_rp_nh (void)
+{
+  struct listnode *node = NULL;
+  struct rp_info *rp_info = NULL;
+  struct nexthop *nh_node = NULL;
+  struct prefix nht_p;
+  struct pim_nexthop_cache pnc;
+  struct pim_neighbor *nbr = NULL;
+
+  for (ALL_LIST_ELEMENTS_RO (qpim_rp_list, node, rp_info))
+    {
+      if (rp_info->rp.rpf_addr.u.prefix4.s_addr == INADDR_NONE)
+        continue;
+
+      nht_p.family = AF_INET;
+      nht_p.prefixlen = IPV4_MAX_BITLEN;
+      nht_p.u.prefix4 = rp_info->rp.rpf_addr.u.prefix4;
+      memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+      if ((pim_find_or_track_nexthop (&nht_p, NULL, rp_info, &pnc)) == 1)
+        {
+          for (nh_node = pnc.nexthop; nh_node; nh_node = nh_node->next)
+            {
+              if (nh_node->gate.ipv4.s_addr == 0)
+                {
+                  nbr =
+                    pim_neighbor_find_if (if_lookup_by_index
+                                          (nh_node->ifindex, VRF_DEFAULT));
+                  if (nbr)
+                    {
+                      nh_node->gate.ipv4 = nbr->source_addr;
+                      if (PIM_DEBUG_TRACE)
+                        {
+                          char str[PREFIX_STRLEN];
+                          char str1[INET_ADDRSTRLEN];
+                          pim_inet4_dump ("<nht_nbr?>", nbr->source_addr,
+                                          str1, sizeof (str1));
+                          pim_addr_dump ("<nht_addr?>", &nht_p, str,
+                                         sizeof (str));
+                          zlog_debug
+                            ("%s: addr %s new nexthop addr %s ifindex %d ",
+                             __PRETTY_FUNCTION__, str, str1,
+                             nh_node->ifindex);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
index 84ab9be4824978b09a9687d0a539c20d961813c1..df18c998d69a4e760f5f7275909ba42e253e7bcc 100644 (file)
@@ -61,6 +61,6 @@ struct pim_rpf *pim_rp_g (struct in_addr group);
 #define RP(G)       pim_rp_g ((G))
 
 void pim_rp_show_information (struct vty *vty, u_char uj);
-
+void pim_resolve_rp_nh (void);
 int pim_rp_list_cmp (void *v1, void *v2);
 #endif
index 400048738a6a32da0e8c9abbaf2898866cae9354..0f5fab0d93596fa81721638322e02ba5aa14578e 100644 (file)
@@ -67,11 +67,14 @@ int pim_nexthop_lookup(struct pim_nexthop *nexthop, struct in_addr addr, int nei
        {
          char addr_str[INET_ADDRSTRLEN];
          pim_inet4_dump("<addr?>", addr, addr_str, sizeof(addr_str));
-         zlog_debug ("%s: Using last lookup for %s at %lld, %lld",
+          char nexthop_str[PREFIX_STRLEN];
+          pim_addr_dump("<nexthop?>", &nexthop->mrib_nexthop_addr,
+                            nexthop_str, sizeof(nexthop_str));
+          zlog_debug ("%s: Using last lookup for %s at %lld, %lld addr%s",
                      __PRETTY_FUNCTION__,
                      addr_str,
                      nexthop->last_lookup_time,
-                     last_route_change_time);
+                     last_route_change_time, nexthop_str);
        }
       nexthop_lookups_avoided++;
       return 0;
@@ -190,32 +193,56 @@ enum pim_rpf_result pim_rpf_update(struct pim_upstream *up, struct pim_rpf *old,
   struct pim_rpf     *rpf = &up->rpf;
   struct pim_rpf     saved;
   struct prefix     nht_p;
+  struct pim_nexthop_cache pnc;
+  int ret = 0;
+  struct prefix src, grp;
 
   saved.source_nexthop = rpf->source_nexthop;
   saved.rpf_addr = rpf->rpf_addr;
 
-  if (is_new)
+  if (is_new && PIM_DEBUG_ZEBRA)
+    {
+      char source_str[INET_ADDRSTRLEN];
+      pim_inet4_dump ("<source?>", up->upstream_addr, source_str,
+                      sizeof (source_str));
+      zlog_debug ("%s: NHT Register upstream %s addr %s with Zebra.",
+                  __PRETTY_FUNCTION__, up->sg_str, source_str);
+    }
+  /* Register addr with Zebra NHT */
+  nht_p.family = AF_INET;
+  nht_p.prefixlen = IPV4_MAX_BITLEN;
+  nht_p.u.prefix4.s_addr = up->upstream_addr.s_addr;
+
+  src.family = AF_INET;
+  src.prefixlen = IPV4_MAX_BITLEN;
+  src.u.prefix4 = up->upstream_addr;    //RP or Src address
+  grp.family = AF_INET;
+  grp.prefixlen = IPV4_MAX_BITLEN;
+  grp.u.prefix4 = up->sg.grp;
+  memset (&pnc, 0, sizeof (struct pim_nexthop_cache));
+  if ((ret = pim_find_or_track_nexthop (&nht_p, up, NULL, &pnc)) == 1)
     {
-      if (PIM_DEBUG_ZEBRA)
+      if (pnc.nexthop_num)
         {
-          char source_str[INET_ADDRSTRLEN];
-          pim_inet4_dump("<source?>", up->upstream_addr, source_str, sizeof(source_str));
-          zlog_debug ("%s: NHT Register upstream %s addr %s with Zebra.",
-                __PRETTY_FUNCTION__, up->sg_str, source_str);
+          //Compute PIM RPF using Cached nexthop
+          pim_ecmp_nexthop_search (&pnc, &up->rpf.source_nexthop,
+                                   &src, &grp,
+                                   !PIM_UPSTREAM_FLAG_TEST_FHR (up->flags) &&
+                                   !PIM_UPSTREAM_FLAG_TEST_SRC_IGMP (up->
+                                                                     flags));
         }
-      /* Register addr with Zebra NHT */
-      nht_p.family = AF_INET;
-      nht_p.prefixlen = IPV4_MAX_BITLEN;
-      nht_p.u.prefix4.s_addr = up->upstream_addr.s_addr;
-      pim_find_or_track_nexthop (&nht_p, up, NULL);
     }
-
-  if (pim_nexthop_lookup(&rpf->source_nexthop,
-                         up->upstream_addr,
-                         !PIM_UPSTREAM_FLAG_TEST_FHR (up->flags) && 
-                         !PIM_UPSTREAM_FLAG_TEST_SRC_IGMP (up->flags))) {
-    return PIM_RPF_FAILURE;
-  }
+  else
+    {
+      if (pim_ecmp_nexthop_lookup (&rpf->source_nexthop,
+                                   up->upstream_addr, &src, &grp,
+                                   !PIM_UPSTREAM_FLAG_TEST_FHR (up->flags) &&
+                                   !PIM_UPSTREAM_FLAG_TEST_SRC_IGMP (up->
+                                                                     flags)))
+        {
+          return PIM_RPF_FAILURE;
+        }
+   }
 
   rpf->rpf_addr.family = AF_INET;
   rpf->rpf_addr.u.prefix4 = pim_rpf_find_rpf_addr(up);
index 5743dac6548ad4235e68dfa6d02bf71e0d872d0f..71634ebfe546211bef2dc6ccd1d3e1b28b8bf6a7 100644 (file)
@@ -240,8 +240,8 @@ pim_upstream_del(struct pim_upstream *up, const char *name)
     {
       char buf[PREFIX2STR_BUFFER];
       prefix2str (&nht_p, buf, sizeof (buf));
-      zlog_debug ("%s: Deregister upstream %s upstream addr %s with NHT ",
-                __PRETTY_FUNCTION__, up->sg_str, buf);
+      zlog_debug ("%s: Deregister upstream %s addr %s with Zebra",
+                  __PRETTY_FUNCTION__, up->sg_str, buf);
     }
   pim_delete_tracked_nexthop (&nht_p, up, NULL);
 
@@ -707,10 +707,12 @@ pim_upstream_new (struct prefix_sg *sg,
     return NULL;
   }
 
-  pim_ifp = up->rpf.source_nexthop.interface->info;
-  if (pim_ifp)
-    up->channel_oil = pim_channel_oil_add(&up->sg, pim_ifp->mroute_vif_index);
-
+  if (up->rpf.source_nexthop.interface)
+    {
+      pim_ifp = up->rpf.source_nexthop.interface->info;
+      if (pim_ifp)
+        up->channel_oil = pim_channel_oil_add(&up->sg, pim_ifp->mroute_vif_index);
+    }
   listnode_add_sort(pim_upstream_list, up);
 
   if (PIM_DEBUG_TRACE)
@@ -781,10 +783,14 @@ struct pim_upstream *pim_upstream_add(struct prefix_sg *sg,
   if (PIM_DEBUG_TRACE)
     {
       if (up)
-       zlog_debug("%s(%s): %s, found: %d: ref_count: %d",
+        {
+          char buf[PREFIX2STR_BUFFER];
+          prefix2str (&up->rpf.rpf_addr, buf, sizeof (buf));
+         zlog_debug("%s(%s): %s, iif %s found: %d: ref_count: %d",
                   __PRETTY_FUNCTION__, name,
-                  up->sg_str, found,
+                  up->sg_str, buf, found,
                   up->ref_count);
+        }
       else
        zlog_debug("%s(%s): (%s) failure to create",
                   __PRETTY_FUNCTION__, name,
index 4e18c478d6a184c9f5efbe324ccdc935a8285aae..80e7d776423a51135666697d52ae4dc80f103510 100644 (file)
@@ -372,6 +372,12 @@ static void scan_upstream_rpf_cache()
   for (ALL_LIST_ELEMENTS(pim_upstream_list, up_node, up_nextnode, up)) {
     enum pim_rpf_result rpf_result;
     struct pim_rpf      old;
+    struct prefix nht_p;
+
+    nht_p.family = AF_INET;
+    nht_p.prefixlen = IPV4_MAX_BITLEN;
+    nht_p.u.prefix4.s_addr = up->upstream_addr.s_addr;
+    pim_resolve_upstream_nh (&nht_p);
 
     old.source_nexthop.interface = up->rpf.source_nexthop.interface;
     old.source_nexthop.nbr       = up->rpf.source_nexthop.nbr;
@@ -574,7 +580,8 @@ static int on_rpf_cache_refresh(struct thread *t)
   qpim_rpf_cache_refresh_last = pim_time_monotonic_sec();
   ++qpim_rpf_cache_refresh_events;
 
-  pim_rp_setup ();
+  //It is called as part of pim_neighbor_add
+  //pim_rp_setup ();
   return 0;
 }
 
@@ -836,6 +843,7 @@ void igmp_source_forward_start(struct igmp_source *source)
   struct igmp_group *group;
   struct prefix_sg sg;
   int result;
+  int input_iface_vif_index = 0;
 
   memset (&sg, 0, sizeof (struct prefix_sg));
   sg.src = source->source_addr;
@@ -861,11 +869,61 @@ void igmp_source_forward_start(struct igmp_source *source)
   if (!source->source_channel_oil) {
     struct in_addr vif_source;
     struct pim_interface *pim_oif;
+    struct prefix nht_p, src, grp;
+    int ret = 0;
+    struct pim_nexthop_cache out_pnc;
+    struct pim_nexthop nexthop;
 
     if (!pim_rp_set_upstream_addr (&vif_source, source->source_addr, sg.grp))
       return;
 
-    int input_iface_vif_index = fib_lookup_if_vif_index(vif_source);
+    /* Register addr with Zebra NHT */
+    nht_p.family = AF_INET;
+    nht_p.prefixlen = IPV4_MAX_BITLEN;
+    nht_p.u.prefix4 = vif_source;
+    memset (&out_pnc, 0, sizeof (struct pim_nexthop_cache));
+
+    if ((ret = pim_find_or_track_nexthop (&nht_p, NULL, NULL, &out_pnc)) == 1)
+      {
+        if (out_pnc.nexthop_num)
+          {
+            src.family = AF_INET;
+            src.prefixlen = IPV4_MAX_BITLEN;
+            src.u.prefix4 = vif_source;   //RP or Src address
+            grp.family = AF_INET;
+            grp.prefixlen = IPV4_MAX_BITLEN;
+            grp.u.prefix4 = sg.grp;
+            memset (&nexthop, 0, sizeof (nexthop));
+            //Compute PIM RPF using Cached nexthop
+            pim_ecmp_nexthop_search (&out_pnc, &nexthop,
+                                  &src, &grp, 0);
+            if (nexthop.interface)
+              input_iface_vif_index = pim_if_find_vifindex_by_ifindex (nexthop.interface->ifindex);
+          }
+        else
+          {
+            if (PIM_DEBUG_ZEBRA)
+              {
+                char buf1[INET_ADDRSTRLEN];
+                char buf2[INET_ADDRSTRLEN];
+                pim_inet4_dump("<source?>", nht_p.u.prefix4, buf1, sizeof(buf1));
+                pim_inet4_dump("<source?>", grp.u.prefix4, buf2, sizeof(buf2));
+                zlog_debug ("%s: NHT Nexthop not found for addr %s grp %s" ,
+                          __PRETTY_FUNCTION__, buf1, buf2);
+              }
+          }
+      }
+    else
+      input_iface_vif_index = fib_lookup_if_vif_index(vif_source);
+
+    if (PIM_DEBUG_ZEBRA)
+      {
+        char buf2[INET_ADDRSTRLEN];
+        pim_inet4_dump("<source?>", vif_source, buf2, sizeof(buf2));
+        zlog_debug ("%s: NHT %s vif_source %s vif_index:%d ", __PRETTY_FUNCTION__,
+            pim_str_sg_dump (&sg), buf2, input_iface_vif_index);
+      }
+
     if (input_iface_vif_index < 1) {
       if (PIM_DEBUG_IGMP_TRACE)
        {
@@ -1013,49 +1071,105 @@ void pim_forward_start(struct pim_ifchannel *ch)
 {
   struct pim_upstream *up = ch->upstream;
   uint32_t mask = PIM_OIF_FLAG_PROTO_PIM;
+  int input_iface_vif_index =  0;
 
   if (PIM_DEBUG_PIM_TRACE) {
     char source_str[INET_ADDRSTRLEN];
-    char group_str[INET_ADDRSTRLEN]; 
+    char group_str[INET_ADDRSTRLEN];
     char upstream_str[INET_ADDRSTRLEN];
 
     pim_inet4_dump("<source?>", ch->sg.src, source_str, sizeof(source_str));
     pim_inet4_dump("<group?>", ch->sg.grp, group_str, sizeof(group_str));
     pim_inet4_dump("<upstream?>", up->upstream_addr, upstream_str, sizeof(upstream_str));
-    zlog_debug("%s: (S,G)=(%s,%s) oif=%s(%s)",
+    zlog_debug("%s: (S,G)=(%s,%s) oif=%s (%s)",
               __PRETTY_FUNCTION__,
               source_str, group_str, ch->interface->name, upstream_str);
   }
 
-  if (!up->channel_oil) {
-    int input_iface_vif_index = fib_lookup_if_vif_index(up->upstream_addr);
-    if (input_iface_vif_index < 1) {
-      if (PIM_DEBUG_PIM_TRACE)
-       {
-         char source_str[INET_ADDRSTRLEN];
-         pim_inet4_dump("<source?>", up->sg.src, source_str, sizeof(source_str));
-         zlog_debug("%s %s: could not find input interface for source %s",
-                    __FILE__, __PRETTY_FUNCTION__,
-                    source_str);
-       }
-      return;
-    }
+  /* Resolve IIF for upstream as mroute_del sets mfcc_parent to MAXVIFS,
+     as part of mroute_del called by pim_forward_stop.
+  */
+  if (!up->channel_oil ||
+      (up->channel_oil && up->channel_oil->oil.mfcc_parent >= MAXVIFS))
+    {
+      struct prefix nht_p, src, grp;
+      int ret = 0;
+      struct pim_nexthop_cache out_pnc;
+      struct pim_nexthop nexthop;
+
+      /* Register addr with Zebra NHT */
+      nht_p.family = AF_INET;
+      nht_p.prefixlen = IPV4_MAX_BITLEN;
+      nht_p.u.prefix4.s_addr = up->upstream_addr.s_addr;
+      grp.family = AF_INET;
+      grp.prefixlen = IPV4_MAX_BITLEN;
+      grp.u.prefix4 = up->sg.grp;
+      memset (&out_pnc, 0, sizeof (struct pim_nexthop_cache));
+
+      if ((ret =
+                  pim_find_or_track_nexthop (&nht_p, NULL, NULL, &out_pnc)) == 1)
+        {
+          if (out_pnc.nexthop_num)
+            {
+              src.family = AF_INET;
+              src.prefixlen = IPV4_MAX_BITLEN;
+              src.u.prefix4 = up->upstream_addr; //RP or Src address
+              grp.family = AF_INET;
+              grp.prefixlen = IPV4_MAX_BITLEN;
+              grp.u.prefix4 = up->sg.grp;
+              memset (&nexthop, 0, sizeof (nexthop));
+              //Compute PIM RPF using Cached nexthop
+              pim_ecmp_nexthop_search (&out_pnc, &nexthop, &src, &grp, 0);
+              input_iface_vif_index =
+                  pim_if_find_vifindex_by_ifindex (nexthop.interface->ifindex);
+            }
+          else
+            {
+              if (PIM_DEBUG_ZEBRA)
+                {
+                  char buf1[INET_ADDRSTRLEN];
+                  char buf2[INET_ADDRSTRLEN];
+                  pim_inet4_dump("<source?>", nht_p.u.prefix4, buf1, sizeof(buf1));
+                  pim_inet4_dump("<source?>", grp.u.prefix4, buf2, sizeof(buf2));
+                  zlog_debug ("%s: NHT pnc is NULL for addr %s grp %s" ,
+                          __PRETTY_FUNCTION__, buf1, buf2);
+                }
+            }
+        }
+      else
+          input_iface_vif_index = fib_lookup_if_vif_index (up->upstream_addr);
 
-    up->channel_oil = pim_channel_oil_add(&up->sg,
-                                         input_iface_vif_index);
-    if (!up->channel_oil) {
-      if (PIM_DEBUG_PIM_TRACE)
-       zlog_debug("%s %s: could not create OIL for channel (S,G)=%s",
-                  __FILE__, __PRETTY_FUNCTION__,
-                  up->sg_str);
-      return;
+      if (input_iface_vif_index < 1)
+        {
+          if (PIM_DEBUG_PIM_TRACE)
+            {
+              char source_str[INET_ADDRSTRLEN];
+              pim_inet4_dump("<source?>", up->sg.src, source_str, sizeof(source_str));
+              zlog_debug("%s %s: could not find input interface for source %s",
+                      __FILE__, __PRETTY_FUNCTION__,
+                      source_str);
+            }
+          return;
+        }
+      if (PIM_DEBUG_TRACE)
+        {
+          zlog_debug ("%s: NHT entry %s update channel_oil vif_index %d ",
+                      __PRETTY_FUNCTION__, up->sg_str, input_iface_vif_index);
+        }
+      up->channel_oil = pim_channel_oil_add (&up->sg, input_iface_vif_index);
+      if (!up->channel_oil)
+        {
+          if (PIM_DEBUG_PIM_TRACE)
+            zlog_debug ("%s %s: could not create OIL for channel (S,G)=%s",
+                        __FILE__, __PRETTY_FUNCTION__, up->sg_str);
+          return;
+        }
     }
-  }
 
   if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_IGMP)
     mask = PIM_OIF_FLAG_PROTO_IGMP;
 
-  pim_channel_add_oif(up->channel_oil, ch->interface, mask);
+  pim_channel_add_oif (up->channel_oil, ch->interface, mask);
 }
 
 void pim_forward_stop(struct pim_ifchannel *ch)
index eaef4ff5c0331bbe0a7fc78be7403c8a4ec30220..3f11d9032c69b45740639f585146e5413758aa08 100644 (file)
@@ -75,7 +75,9 @@ unsigned int              qpim_keep_alive_time = PIM_KEEPALIVE_PERIOD;
 signed int                qpim_rp_keep_alive_time = 0;
 int64_t                   qpim_nexthop_lookups = 0;
 int                       qpim_packet_process = PIM_DEFAULT_PACKET_PROCESS;
-struct pim_instance          *pimg = NULL;
+uint8_t                   qpim_ecmp_enable = 0;
+uint8_t                   qpim_ecmp_rebalance_enable = 0;
+struct pim_instance       *pimg = NULL;
 
 int32_t qpim_register_suppress_time = PIM_REGISTER_SUPPRESSION_TIME_DEFAULT;
 int32_t qpim_register_probe_time = PIM_REGISTER_PROBE_TIME_DEFAULT;
index 6c3dcfafcabea50fce4035cd8d2dbec66ed04f7d..2e8d0d11bb744d564e2a11f41d93f3506e9f2a28 100644 (file)
@@ -155,6 +155,9 @@ struct list              *qpim_static_route_list; /* list of routes added static
 extern unsigned int       qpim_keep_alive_time;
 extern signed int         qpim_rp_keep_alive_time;
 extern int                qpim_packet_process;
+extern uint8_t            qpim_ecmp_enable;
+extern uint8_t            qpim_ecmp_rebalance_enable;
+
 #define PIM_DEFAULT_PACKET_PROCESS 3
 
 #define PIM_JP_HOLDTIME (qpim_t_periodic * 7 / 2)