]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - net/ipv4/route.c
net: Replace vrf_master_ifindex{, _rcu} with l3mdev equivalents
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / route.c
index e681b852ced1d0c0cde984496d832d9cf3f7fad2..ba466667c45c366f45bc72e417b147c8b7096c4d 100644 (file)
@@ -91,6 +91,7 @@
 #include <linux/slab.h>
 #include <linux/jhash.h>
 #include <net/dst.h>
+#include <net/dst_metadata.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/icmp.h>
 #include <net/xfrm.h>
+#include <net/lwtunnel.h>
 #include <net/netevent.h>
 #include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/kmemleak.h>
 #endif
 #include <net/secure_seq.h>
+#include <net/ip_tunnels.h>
+#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 #define RT_FL_TOS(oldflp4) \
        ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -834,6 +839,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
        struct inet_peer *peer;
        struct net *net;
        int log_martians;
+       int vif;
 
        rcu_read_lock();
        in_dev = __in_dev_get_rcu(rt->dst.dev);
@@ -842,10 +848,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                return;
        }
        log_martians = IN_DEV_LOG_MARTIANS(in_dev);
+       vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
        rcu_read_unlock();
 
        net = dev_net(rt->dst.dev);
-       peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
+       peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
        if (!peer) {
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
                          rt_nexthop(rt, ip_hdr(skb)->daddr));
@@ -934,7 +941,8 @@ static int ip_error(struct sk_buff *skb)
                break;
        }
 
-       peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
+       peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
+                              l3mdev_master_ifindex(skb->dev), 1);
 
        send = true;
        if (peer) {
@@ -1403,6 +1411,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 #ifdef CONFIG_IP_ROUTE_CLASSID
                rt->dst.tclassid = nh->nh_tclassid;
 #endif
+               rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
                if (unlikely(fnhe))
                        cached = rt_bind_exception(rt, fnhe, daddr);
                else if (!(rt->dst.flags & DST_NOCACHE))
@@ -1430,12 +1439,34 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 }
 
 static struct rtable *rt_dst_alloc(struct net_device *dev,
+                                  unsigned int flags, u16 type,
                                   bool nopolicy, bool noxfrm, bool will_cache)
 {
-       return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
-                        (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
-                        (nopolicy ? DST_NOPOLICY : 0) |
-                        (noxfrm ? DST_NOXFRM : 0));
+       struct rtable *rt;
+
+       rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+                      (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
+                      (nopolicy ? DST_NOPOLICY : 0) |
+                      (noxfrm ? DST_NOXFRM : 0));
+
+       if (rt) {
+               rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+               rt->rt_flags = flags;
+               rt->rt_type = type;
+               rt->rt_is_input = 0;
+               rt->rt_iif = 0;
+               rt->rt_pmtu = 0;
+               rt->rt_gateway = 0;
+               rt->rt_uses_gateway = 0;
+               rt->rt_table_id = 0;
+               INIT_LIST_HEAD(&rt->rt_uncached);
+
+               rt->dst.output = ip_output;
+               if (flags & RTCF_LOCAL)
+                       rt->dst.input = ip_local_deliver;
+       }
+
+       return rt;
 }
 
 /* called in rcu_read_lock() section */
@@ -1444,6 +1475,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
        struct rtable *rth;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
+       unsigned int flags = RTCF_MULTICAST;
        u32 itag = 0;
        int err;
 
@@ -1456,9 +1488,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
            skb->protocol != htons(ETH_P_IP))
                goto e_inval;
 
-       if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
-               if (ipv4_is_loopback(saddr))
-                       goto e_inval;
+       if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
+               goto e_inval;
 
        if (ipv4_is_zeronet(saddr)) {
                if (!ipv4_is_local_multicast(daddr))
@@ -1469,7 +1500,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                if (err < 0)
                        goto e_err;
        }
-       rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
+       if (our)
+               flags |= RTCF_LOCAL;
+
+       rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
        if (!rth)
                goto e_nobufs;
@@ -1478,20 +1512,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->dst.tclassid = itag;
 #endif
        rth->dst.output = ip_rt_bug;
-
-       rth->rt_genid   = rt_genid_ipv4(dev_net(dev));
-       rth->rt_flags   = RTCF_MULTICAST;
-       rth->rt_type    = RTN_MULTICAST;
        rth->rt_is_input= 1;
-       rth->rt_iif     = 0;
-       rth->rt_pmtu    = 0;
-       rth->rt_gateway = 0;
-       rth->rt_uses_gateway = 0;
-       INIT_LIST_HEAD(&rth->rt_uncached);
-       if (our) {
-               rth->dst.input= ip_local_deliver;
-               rth->rt_flags |= RTCF_LOCAL;
-       }
 
 #ifdef CONFIG_IP_MROUTE
        if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -1546,7 +1567,6 @@ static int __mkroute_input(struct sk_buff *skb,
        struct rtable *rth;
        int err;
        struct in_device *out_dev;
-       unsigned int flags = 0;
        bool do_cache;
        u32 itag = 0;
 
@@ -1601,7 +1621,7 @@ static int __mkroute_input(struct sk_buff *skb,
                }
        }
 
-       rth = rt_dst_alloc(out_dev->dev,
+       rth = rt_dst_alloc(out_dev->dev, 0, res->type,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
                           IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
        if (!rth) {
@@ -1609,21 +1629,22 @@ static int __mkroute_input(struct sk_buff *skb,
                goto cleanup;
        }
 
-       rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
-       rth->rt_flags = flags;
-       rth->rt_type = res->type;
        rth->rt_is_input = 1;
-       rth->rt_iif     = 0;
-       rth->rt_pmtu    = 0;
-       rth->rt_gateway = 0;
-       rth->rt_uses_gateway = 0;
-       INIT_LIST_HEAD(&rth->rt_uncached);
+       if (res->table)
+               rth->rt_table_id = res->table->tb_id;
        RT_CACHE_STAT_INC(in_slow_tot);
 
        rth->dst.input = ip_forward;
-       rth->dst.output = ip_output;
 
        rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
+       if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
+               rth->dst.lwtstate->orig_output = rth->dst.output;
+               rth->dst.output = lwtunnel_output;
+       }
+       if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+               rth->dst.lwtstate->orig_input = rth->dst.input;
+               rth->dst.input = lwtunnel_input;
+       }
        skb_dst_set(skb, &rth->dst);
 out:
        err = 0;
@@ -1662,6 +1683,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
        struct fib_result res;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
+       struct ip_tunnel_info *tun_info;
        struct flowi4   fl4;
        unsigned int    flags = 0;
        u32             itag = 0;
@@ -1679,10 +1701,18 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
           by fib_lookup.
         */
 
+       tun_info = skb_tunnel_info(skb);
+       if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
+               fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
+       else
+               fl4.flowi4_tun_key.tun_id = 0;
+       skb_dst_drop(skb);
+
        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
                goto martian_source;
 
        res.fi = NULL;
+       res.table = NULL;
        if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
                goto brd_input;
 
@@ -1710,7 +1740,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         *      Now we are ready to route packet.
         */
        fl4.flowi4_oif = 0;
-       fl4.flowi4_iif = dev->ifindex;
+       fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
        fl4.flowi4_mark = skb->mark;
        fl4.flowi4_tos = tos;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -1730,7 +1760,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                err = fib_validate_source(skb, saddr, daddr, tos,
                                          0, dev, in_dev, &itag);
                if (err < 0)
-                       goto martian_source_keep_err;
+                       goto martian_source;
                goto local_input;
        }
 
@@ -1752,7 +1782,7 @@ brd_input:
                err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
                                          in_dev, &itag);
                if (err < 0)
-                       goto martian_source_keep_err;
+                       goto martian_source;
        }
        flags |= RTCF_BROADCAST;
        res.type = RTN_BROADCAST;
@@ -1772,26 +1802,19 @@ local_input:
                }
        }
 
-       rth = rt_dst_alloc(net->loopback_dev,
+       rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
        if (!rth)
                goto e_nobufs;
 
-       rth->dst.input= ip_local_deliver;
        rth->dst.output= ip_rt_bug;
 #ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
 #endif
-
-       rth->rt_genid = rt_genid_ipv4(net);
-       rth->rt_flags   = flags|RTCF_LOCAL;
-       rth->rt_type    = res.type;
        rth->rt_is_input = 1;
-       rth->rt_iif     = 0;
-       rth->rt_pmtu    = 0;
-       rth->rt_gateway = 0;
-       rth->rt_uses_gateway = 0;
-       INIT_LIST_HEAD(&rth->rt_uncached);
+       if (res.table)
+               rth->rt_table_id = res.table->tb_id;
+
        RT_CACHE_STAT_INC(in_slow_tot);
        if (res.type == RTN_UNREACHABLE) {
                rth->dst.input= ip_error;
@@ -1812,6 +1835,7 @@ no_route:
        RT_CACHE_STAT_INC(in_no_route);
        res.type = RTN_UNREACHABLE;
        res.fi = NULL;
+       res.table = NULL;
        goto local_input;
 
        /*
@@ -1834,8 +1858,6 @@ e_nobufs:
        goto out;
 
 martian_source:
-       err = -EINVAL;
-martian_source_keep_err:
        ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
        goto out;
 }
@@ -1963,29 +1985,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
        }
 
 add:
-       rth = rt_dst_alloc(dev_out,
+       rth = rt_dst_alloc(dev_out, flags, type,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
                           IN_DEV_CONF_GET(in_dev, NOXFRM),
                           do_cache);
        if (!rth)
                return ERR_PTR(-ENOBUFS);
 
-       rth->dst.output = ip_output;
-
-       rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
-       rth->rt_flags   = flags;
-       rth->rt_type    = type;
-       rth->rt_is_input = 0;
        rth->rt_iif     = orig_oif ? : 0;
-       rth->rt_pmtu    = 0;
-       rth->rt_gateway = 0;
-       rth->rt_uses_gateway = 0;
-       INIT_LIST_HEAD(&rth->rt_uncached);
+       if (res->table)
+               rth->rt_table_id = res->table->tb_id;
 
        RT_CACHE_STAT_INC(out_slow_tot);
 
-       if (flags & RTCF_LOCAL)
-               rth->dst.input = ip_local_deliver;
        if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
                if (flags & RTCF_LOCAL &&
                    !(dev_out->flags & IFF_LOOPBACK)) {
@@ -2004,6 +2016,8 @@ add:
        }
 
        rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
+       if (lwtunnel_output_redirect(rth->dst.lwtstate))
+               rth->dst.output = lwtunnel_output;
 
        return rth;
 }
@@ -2020,6 +2034,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
        struct fib_result res;
        struct rtable *rth;
        int orig_oif;
+       int err = -ENETUNREACH;
 
        res.tclassid    = 0;
        res.fi          = NULL;
@@ -2110,6 +2125,11 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_HOST);
                }
+               if (netif_is_l3_master(dev_out) &&
+                   !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
+                       rth = vrf_dev_get_rth(dev_out);
+                       goto out;
+               }
        }
 
        if (!fl4->daddr) {
@@ -2123,7 +2143,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
                goto make_route;
        }
 
-       if (fib_lookup(net, fl4, &res, 0)) {
+       err = fib_lookup(net, fl4, &res, 0);
+       if (err) {
                res.fi = NULL;
                res.table = NULL;
                if (fl4->flowi4_oif) {
@@ -2151,7 +2172,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
                        res.type = RTN_UNICAST;
                        goto make_route;
                }
-               rth = ERR_PTR(-ENETUNREACH);
+               rth = ERR_PTR(err);
                goto out;
        }
 
@@ -2261,7 +2282,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                rt->rt_uses_gateway = ort->rt_uses_gateway;
 
                INIT_LIST_HEAD(&rt->rt_uncached);
-
                dst_free(new);
        }
 
@@ -2271,7 +2291,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 }
 
 struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
-                                   struct sock *sk)
+                                   const struct sock *sk)
 {
        struct rtable *rt = __ip_route_output_key(net, flp4);
 
@@ -2287,7 +2307,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 }
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
-static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
+static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
                        struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
                        u32 seq, int event, int nowait, unsigned int flags)
 {
@@ -2307,8 +2327,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
        r->rtm_dst_len  = 32;
        r->rtm_src_len  = 0;
        r->rtm_tos      = fl4->flowi4_tos;
-       r->rtm_table    = RT_TABLE_MAIN;
-       if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+       r->rtm_table    = table_id;
+       if (nla_put_u32(skb, RTA_TABLE, table_id))
                goto nla_put_failure;
        r->rtm_type     = rt->rt_type;
        r->rtm_scope    = RT_SCOPE_UNIVERSE;
@@ -2413,6 +2433,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        int err;
        int mark;
        struct sk_buff *skb;
+       u32 table_id = RT_TABLE_MAIN;
 
        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
        if (err < 0)
@@ -2482,7 +2503,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        if (rtm->rtm_flags & RTM_F_NOTIFY)
                rt->rt_flags |= RTCF_NOTIFY;
 
-       err = rt_fill_info(net, dst, src, &fl4, skb,
+       if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
+               table_id = rt->rt_table_id;
+
+       err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
                           NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
                           RTM_NEWROUTE, 0, 0);
        if (err < 0)