#include <linux/slab.h>
#include <linux/udp.h>
#include <linux/igmp.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/ethtool.h>
#include <net/arp.h>
ip_tunnel_collect_metadata();
}
+static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr)
+{
+ struct ip_fan_map *fan_map;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ if (fan_map->overlay ==
+ (daddr & inet_make_mask(fan_map->overlay_prefix))) {
+ rcu_read_unlock();
+ return fan_map;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void vxlan_fan_flush_map(struct vxlan_dev *vxlan)
+{
+ struct ip_fan_map *fan_map;
+
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
+ }
+}
+
+static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay)
+{
+ struct ip_fan_map *fan_map;
+
+ fan_map = vxlan_fan_find_map(vxlan, overlay);
+ if (!fan_map)
+ return -ENOENT;
+
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
+
+ return 0;
+}
+
+static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map)
+{
+ __be32 overlay_mask, underlay_mask;
+ struct ip_fan_map *fan_map;
+
+ overlay_mask = inet_make_mask(map->overlay_prefix);
+ underlay_mask = inet_make_mask(map->underlay_prefix);
+
+ netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n",
+ map->overlay, map->overlay_prefix,
+ map->underlay, map->underlay_prefix,
+ overlay_mask, underlay_mask);
+
+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
+ return -EINVAL;
+
+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
+ return -EINVAL;
+
+ /* Special case: overlay 0 and underlay 0: flush all mappings */
+ if (!map->overlay && !map->underlay) {
+ vxlan_fan_flush_map(vxlan);
+ return 0;
+ }
+
+ /* Special case: overlay set and underlay 0: clear map for overlay */
+ if (!map->underlay)
+ return vxlan_fan_del_map(vxlan, map->overlay);
+
+ if (vxlan_fan_find_map(vxlan, map->overlay))
+ return -EEXIST;
+
+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
+ fan_map->underlay = map->underlay;
+ fan_map->overlay = map->overlay;
+ fan_map->underlay_prefix = map->underlay_prefix;
+ fan_map->overlay_mask = ntohl(overlay_mask);
+ fan_map->overlay_prefix = map->overlay_prefix;
+
+ list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps);
+
+ return 0;
+}
+
+static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan)
+{
+ struct ifla_fan_map *map;
+ struct nlattr *attr;
+ int rem, rv;
+
+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
+ map = nla_data(attr);
+ rv = vxlan_fan_add_map(vxlan, map);
+ if (rv)
+ return rv;
+ }
+
+ return 0;
+}
+
+static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb,
+ struct vxlan_rdst *fan_rdst)
+{
+ struct ip_fan_map *f_map;
+ union vxlan_addr *va;
+ u32 daddr, underlay;
+ struct arphdr *arp;
+ void *arp_ptr;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+
+ eth = eth_hdr(skb);
+ switch (eth->h_proto) {
+ case htons(ETH_P_IP):
+ iph = ip_hdr(skb);
+ if (!iph)
+ return -EINVAL;
+ daddr = iph->daddr;
+ break;
+ case htons(ETH_P_ARP):
+ arp = arp_hdr(skb);
+ if (!arp)
+ return -EINVAL;
+ arp_ptr = arp + 1;
+ netdev_dbg(vxlan->dev,
+ "vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n",
+ arp_ptr, arp_ptr + skb->dev->addr_len,
+ arp_ptr + skb->dev->addr_len + 4,
+ arp_ptr + (skb->dev->addr_len * 2) + 4);
+ arp_ptr += (skb->dev->addr_len * 2) + 4;
+ memcpy(&daddr, arp_ptr, 4);
+ break;
+ default:
+ netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto);
+ return -EINVAL;
+ }
+
+ f_map = vxlan_fan_find_map(vxlan, daddr);
+ if (!f_map)
+ return -EINVAL;
+
+ daddr = ntohl(daddr);
+ underlay = ntohl(f_map->underlay);
+ if (!underlay)
+ return -EINVAL;
+
+ memset(fan_rdst, 0, sizeof(*fan_rdst));
+ va = &fan_rdst->remote_ip;
+ va->sa.sa_family = AF_INET;
+ fan_rdst->remote_vni = vxlan->default_dst.remote_vni;
+ va->sin.sin_addr.s_addr = htonl(underlay |
+ ((daddr & ~f_map->overlay_mask) >>
+ (32 - f_map->overlay_prefix -
+ (32 - f_map->underlay_prefix))));
+ netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n",
+ daddr, underlay, va->sin.sin_addr.s_addr);
+
+ return 0;
+}
+
#if IS_ENABLED(CONFIG_IPV6)
static inline
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
flush = 0;
out:
- skb_gro_remcsum_cleanup(skb, &grc);
- skb->remcsum_offload = 0;
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
return pp;
}
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
-/* Add new entry to forwarding table -- assumes lock held */
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
+ const u8 *mac, __u16 state,
+ __be32 src_vni, __u8 ndm_flags)
+{
+ struct vxlan_fdb *f;
+
+ f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return NULL;
+ f->state = state;
+ f->flags = ndm_flags;
+ f->updated = f->used = jiffies;
+ f->vni = src_vni;
+ INIT_LIST_HEAD(&f->remotes);
+ memcpy(f->eth_addr, mac, ETH_ALEN);
+
+ return f;
+}
+
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __be16 port, __be32 src_vni,
+ __be32 vni, __u32 ifindex, __u8 ndm_flags,
+ struct vxlan_fdb **fdb)
+{
+ struct vxlan_rdst *rd = NULL;
+ struct vxlan_fdb *f;
+ int rc;
+
+ if (vxlan->cfg.addrmax &&
+ vxlan->addrcnt >= vxlan->cfg.addrmax)
+ return -ENOSPC;
+
+ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+ if (!f)
+ return -ENOMEM;
+
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ if (rc < 0) {
+ kfree(f);
+ return rc;
+ }
+
+ ++vxlan->addrcnt;
+ hlist_add_head_rcu(&f->hlist,
+ vxlan_fdb_head(vxlan, mac, src_vni));
+
+ *fdb = f;
+
+ return 0;
+}
+
+/* Add new entry to forwarding table -- assumes lock held */
+static int vxlan_fdb_update(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __be32 src_vni, __be32 vni,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- if (vxlan->cfg.addrmax &&
- vxlan->addrcnt >= vxlan->cfg.addrmax)
- return -ENOSPC;
-
/* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) &&
(is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
return -EOPNOTSUPP;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
- f = kmalloc(sizeof(*f), GFP_ATOMIC);
- if (!f)
- return -ENOMEM;
-
- notify = 1;
- f->state = state;
- f->flags = ndm_flags;
- f->updated = f->used = jiffies;
- f->vni = src_vni;
- INIT_LIST_HEAD(&f->remotes);
- memcpy(f->eth_addr, mac, ETH_ALEN);
-
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
- if (rc < 0) {
- kfree(f);
+ rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
+ vni, ifindex, ndm_flags, &f);
+ if (rc < 0)
return rc;
- }
-
- ++vxlan->addrcnt;
- hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac, src_vni));
+ notify = 1;
}
if (notify) {
kfree(f);
}
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ bool do_notify)
{
netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+ if (do_notify)
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
return -EAFNOSUPPORT;
spin_lock_bh(&vxlan->hash_lock);
- err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
+ err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
goto out;
}
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
out:
return 0;
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
- vxlan_fdb_create(vxlan, src_mac, src_ip,
+ vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port,
goto drop;
}
+ rcu_read_lock();
+
+ if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
+ rcu_read_unlock();
+ atomic_long_inc(&vxlan->dev->rx_dropped);
+ goto drop;
+ }
+
stats = this_cpu_ptr(vxlan->dev->tstats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
u64_stats_update_end(&stats->syncp);
gro_cells_receive(&vxlan->gro_cells, skb);
+
+ rcu_read_unlock();
+
return 0;
drop:
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
- struct net_device *dev = skb->dev;
+ struct net_device *dev;
int len = skb->len;
tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
#endif
}
+ rcu_read_lock();
+ dev = skb->dev;
+ if (unlikely(!(dev->flags & IFF_UP))) {
+ kfree_skb(skb);
+ goto drop;
+ }
+
if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
- vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0,
- vni);
+ vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
rx_stats->rx_bytes += len;
u64_stats_update_end(&rx_stats->syncp);
} else {
+drop:
dev->stats.rx_dropped++;
}
+ rcu_read_unlock();
}
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
vni = tunnel_id_to_key32(info->key.tun_id);
ifindex = 0;
dst_cache = &info->dst_cache;
- if (info->options_len)
+ if (info->options_len) {
+ if (info->options_len < sizeof(*md))
+ goto drop;
md = ip_tunnel_info_opts(info);
+ }
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
goto tx_error;
}
+ if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) {
+ netdev_dbg(dev, "discard fan to localhost %pI4\n",
+ &dst->sin.sin_addr.s_addr);
+ ip_rt_put(rt);
+ goto tx_free;
+ }
+
/* Bypass encapsulation if the destination is local */
if (!info) {
err = encap_bypass_if_local(skb, dev, vxlan, dst,
if (skb_dst(skb)) {
int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
- skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
}
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
if (skb_dst(skb)) {
int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
- skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
}
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
dev->stats.tx_carrier_errors++;
dst_release(ndst);
dev->stats.tx_errors++;
+tx_free:
kfree_skb(skb);
}
#endif
}
+ if (fan_has_map(&vxlan->fan)) {
+ struct vxlan_rdst fan_rdst;
+
+ netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n",
+ eth->h_proto, eth->h_dest);
+ if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) {
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ vxlan_xmit_one(skb, dev, vni, &fan_rdst, 0);
+ return NETDEV_TX_OK;
+ }
+
eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
"garbage collect %pM\n",
f->eth_addr);
f->state = NUD_STALE;
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
spin_unlock_bh(&vxlan->hash_lock);
}
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ gro_cells_destroy(&vxlan->gro_cells);
+
vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
free_percpu(dev->tstats);
continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */
if (!is_zero_ether_addr(f->eth_addr))
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
}
}
spin_unlock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
+
+ INIT_LIST_HEAD(&vxlan->fan.fan_maps);
}
static void vxlan_ether_setup(struct net_device *dev)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb *f = NULL;
int err;
err = vxlan_dev_configure(net, dev, conf, false, extack);
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&vxlan->default_dst.remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_EXCL | NLM_F_CREATE,
vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex,
- NTF_SELF);
+ NTF_SELF, &f);
if (err)
return err;
}
err = register_netdevice(dev);
+ if (err)
+ goto errout;
+
+ err = rtnl_configure_link(dev, NULL);
if (err) {
- vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
- return err;
+ unregister_netdevice(dev);
+ goto errout;
}
+ /* notify default fdb entry */
+ if (f)
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
+
list_add(&vxlan->next, &vn->vxlan_list);
return 0;
+errout:
+ if (f)
+ vxlan_fdb_destroy(vxlan, f, false);
+ return err;
}
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
bool changelink)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
memset(conf, 0, sizeof(*conf));
conf->remote_ip.sa.sa_family = AF_INET6;
}
+ if (data[IFLA_VXLAN_FAN_MAP]) {
+ err = vxlan_parse_fan_map(data, vxlan);
+ if (err)
+ return err;
+ }
+
if (data[IFLA_VXLAN_LOCAL]) {
if (changelink && (conf->saddr.sa.sa_family != AF_INET))
return -EOPNOTSUPP;
struct vxlan_rdst *dst = &vxlan->default_dst;
struct vxlan_rdst old_dst;
struct vxlan_config conf;
+ struct vxlan_fdb *f = NULL;
int err;
err = vxlan_nl2conf(tb, data,
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_CREATE | NLM_F_APPEND,
vxlan->cfg.dst_port,
dst->remote_vni,
dst->remote_vni,
dst->remote_ifindex,
- NTF_SELF);
+ NTF_SELF, &f);
if (err) {
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
}
spin_unlock_bh(&vxlan->hash_lock);
}
vxlan_flush(vxlan, true);
- gro_cells_destroy(&vxlan->gro_cells);
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
}
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
+ nla_total_size(sizeof(struct ip_fan_map) * 256) +
0;
}
}
}
+ if (fan_has_map(&vxlan->fan)) {
+ struct nlattr *fan_nest;
+ struct ip_fan_map *fan_map;
+
+ fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP);
+ if (!fan_nest)
+ goto nla_put_failure;
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ struct ifla_fan_map map;
+
+ map.underlay = fan_map->underlay;
+ map.underlay_prefix = fan_map->underlay_prefix;
+ map.overlay = fan_map->overlay;
+ map.overlay_prefix = fan_map->overlay_prefix;
+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fan_nest);
+ }
+
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
return 0;
}
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *vxlan_fan_header;
+static unsigned int vxlan_fan_version = 4;
+
+static struct ctl_table vxlan_fan_sysctls[] = {
+ {
+ .procname = "vxlan",
+ .data = &vxlan_fan_version,
+ .maxlen = sizeof(vxlan_fan_version),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {},
+};
+#endif /* CONFIG_SYSCTL */
+
static void __net_exit vxlan_exit_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
/* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop.
*/
- if (!net_eq(dev_net(vxlan->dev), net)) {
- gro_cells_destroy(&vxlan->gro_cells);
+ if (!net_eq(dev_net(vxlan->dev), net))
unregister_netdevice_queue(vxlan->dev, &list);
- }
}
unregister_netdevice_many(&list);
if (rc)
goto out3;
+#ifdef CONFIG_SYSCTL
+ vxlan_fan_header = register_net_sysctl(&init_net, "net/fan",
+ vxlan_fan_sysctls);
+ if (!vxlan_fan_header) {
+ rc = -ENOMEM;
+ goto sysctl_failed;
+ }
+#endif /* CONFIG_SYSCTL */
+
return 0;
+#ifdef CONFIG_SYSCTL
+sysctl_failed:
+ rtnl_link_unregister(&vxlan_link_ops);
+#endif /* CONFIG_SYSCTL */
out3:
unregister_netdevice_notifier(&vxlan_notifier_block);
out2:
static void __exit vxlan_cleanup_module(void)
{
+#ifdef CONFIG_SYSCTL
+ unregister_net_sysctl_table(vxlan_fan_header);
+#endif /* CONFIG_SYSCTL */
rtnl_link_unregister(&vxlan_link_ops);
unregister_netdevice_notifier(&vxlan_notifier_block);
unregister_pernet_subsys(&vxlan_net_ops);