]> git.proxmox.com Git - ovs.git/commitdiff
datapath: netlink: support L3 packets
authorYang, Yi Y <yi.y.yang@intel.com>
Mon, 6 Feb 2017 13:04:40 +0000 (21:04 +0800)
committerJoe Stringer <joe@ovn.org>
Thu, 2 Mar 2017 23:51:39 +0000 (15:51 -0800)
Upstream commit:
    commit 0a6410fbde597ebcf82dda4a0b0e889e82242678
    Author: Jiri Benc <jbenc@redhat.com>
    Date:   Thu Nov 10 16:28:22 2016 +0100

    openvswitch: netlink: support L3 packets

    Extend the ovs flow netlink protocol to support L3 packets. Packets without
    OVS_KEY_ATTR_ETHERNET attribute specify L3 packets; for those, the
    OVS_KEY_ATTR_ETHERTYPE attribute is mandatory.

    Push/pop vlan actions are only supported for Ethernet packets.

    Based on previous versions by Lorand Jakab and Simon Horman.

Signed-off-by: Lorand Jakab <lojakab@cisco.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Upstream commit:
    commit 87e159c59d9f325d571689d4027115617adb32e6
    Author: Jarno Rajahalme <jarno@ovn.org>
    Date:   Mon Dec 19 17:06:33 2016 -0800

    openvswitch: Add a missing break statement.

    Add a break statement to prevent fall-through from
    OVS_KEY_ATTR_ETHERNET to OVS_KEY_ATTR_TUNNEL.  Without the break
    actions setting ethernet addresses fail to validate with log messages
    complaining about invalid tunnel attributes.

Fixes: 0a6410fbde ("openvswitch: netlink: support L3 packets")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Upstream commit:
    commit df30f7408b187929dbde72661c7f7c615268f1d0
    Author: pravin shelar <pshelar@ovn.org>
    Date:   Mon Dec 26 08:31:27 2016 -0800

    openvswitch: upcall: Fix vlan handling.

    Networking stack accelerate vlan tag handling by
    keeping topmost vlan header in skb. This works as
    long as packet remains in OVS datapath. But during
    OVS upcall vlan header is pushed on to the packet.
    When such packet is sent back to OVS datapath, core
    networking stack might not handle it correctly. Following
    patch avoids this issue by accelerating the vlan tag
    during flow key extract. This simplifies datapath by
    bringing uniform packet processing for packets from
    all code paths.

Fixes: 5108bbaddc ("openvswitch: add processing of L3 packets").
CC: Jarno Rajahalme <jarno@ovn.org>
CC: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Committer Notes]

Squashed in the following upstream commits to retain bisectability:
87e159c59d9f ("openvswitch: Add a missing break statement.")
df30f7408b18 ("openvswitch: upcall: Fix vlan handling.")

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
datapath/datapath.c
datapath/flow.c
datapath/flow_netlink.c

index fefc7953091b53577674eeeb898572ad339d61a3..09fa5f1f4b4dd694dde35393912563db015675de 100644 (file)
@@ -615,7 +615,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        rcu_assign_pointer(flow->sf_acts, acts);
        packet->priority = flow->key.phy.priority;
        packet->mark = flow->key.phy.skb_mark;
-       packet->protocol = flow->key.eth.type;
 
        rcu_read_lock();
        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
index 05940ce369786599a8816c6cd55651c0341bba18..f9a4ddbc70056b530b262075fd71808b8e9afc28 100644 (file)
@@ -311,7 +311,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
  * Returns 0 if it encounters a non-vlan or incomplete packet.
  * Returns 1 after successfully parsing vlan tag.
  */
-static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
+                         bool untag_vlan)
 {
        struct vlan_head *vh = (struct vlan_head *)skb->data;
 
@@ -329,7 +330,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
        key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
        key_vh->tpid = vh->tpid;
 
-       __skb_pull(skb, sizeof(struct vlan_head));
+       if (unlikely(untag_vlan)) {
+               int offset = skb->data - skb_mac_header(skb);
+               u16 tci;
+               int err;
+
+               __skb_push(skb, offset);
+               err = __skb_vlan_pop(skb, &tci);
+               __skb_pull(skb, offset);
+               if (err)
+                       return err;
+               __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
+       } else {
+               __skb_pull(skb, sizeof(struct vlan_head));
+       }
        return 1;
 }
 
@@ -355,13 +369,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
                key->eth.vlan.tpid = skb->vlan_proto;
        } else {
                /* Parse outer vlan tag in the non-accelerated case. */
-               res = parse_vlan_tag(skb, &key->eth.vlan);
+               res = parse_vlan_tag(skb, &key->eth.vlan, true);
                if (res <= 0)
                        return res;
        }
 
        /* Parse inner vlan tag. */
-       res = parse_vlan_tag(skb, &key->eth.cvlan);
+       res = parse_vlan_tag(skb, &key->eth.cvlan, false);
        if (res <= 0)
                return res;
 
@@ -809,29 +823,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
        if (err)
                return err;
 
-       if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
-               /* key_extract assumes that skb->protocol is set-up for
-                * layer 3 packets which is the case for other callers,
-                * in particular packets recieved from the network stack.
-                * Here the correct value can be set from the metadata
-                * extracted above.
-                */
-               skb->protocol = key->eth.type;
-       } else {
-               struct ethhdr *eth;
-
-               skb_reset_mac_header(skb);
-               eth = eth_hdr(skb);
-
-               /* Normally, setting the skb 'protocol' field would be
-                * handled by a call to eth_type_trans(), but it assumes
-                * there's a sending device, which we may not have.
-                */
-               if (eth_proto_is_802_3(eth->h_proto))
-                       skb->protocol = eth->h_proto;
-               else
-                       skb->protocol = htons(ETH_P_802_2);
-       }
+       /* key_extract assumes that skb->protocol is set-up for
+        * layer 3 packets which is the case for other callers,
+        * in particular packets received from the network stack.
+        * Here the correct value can be set from the metadata
+        * extracted above.
+        * For L2 packet key eth type would be zero. skb protocol
+        * would be set to correct value later during key-extact.
+        */
 
+       skb->protocol = key->eth.type;
        return key_extract(skb, key);
 }
index 21137fdcc21f8d6e4345be2d95b89da7df33ae07..9718a5eb79fea74971d3c314861a3569dff0e413 100644 (file)
@@ -124,7 +124,7 @@ static void update_range(struct sw_flow_match *match,
 static bool match_validate(const struct sw_flow_match *match,
                           u64 key_attrs, u64 mask_attrs, bool log)
 {
-       u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
+       u64 key_expected = 0;
        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 
        /* The following mask attributes allowed only if they
@@ -971,10 +971,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
        return 0;
 }
 
+static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
+                                      u64 *attrs, const struct nlattr **a,
+                                      bool is_mask, bool log)
+{
+       __be16 eth_type;
+
+       eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+       if (is_mask) {
+               /* Always exact match EtherType. */
+               eth_type = htons(0xffff);
+       } else if (!eth_proto_is_802_3(eth_type)) {
+               OVS_NLERR(log, "EtherType %x is less than min %x",
+                               ntohs(eth_type), ETH_P_802_3_MIN);
+               return -EINVAL;
+       }
+
+       SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+       *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+       return 0;
+}
+
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                                  u64 *attrs, const struct nlattr **a,
                                 bool is_mask, bool log)
 {
+       u8 mac_proto = MAC_PROTO_ETHERNET;
+
        if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
                u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
 
@@ -1062,9 +1085,19 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
        }
 
+       /* For layer 3 packets the Ethernet type is provided
+        * and treated as metadata but no MAC addresses are provided.
+        */
+       if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+           (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
+               mac_proto = MAC_PROTO_NONE;
+
        /* Always exact match mac_proto */
-       SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : MAC_PROTO_ETHERNET,
-                       is_mask);
+       SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
+
+       if (mac_proto == MAC_PROTO_NONE)
+               return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
+                                                  log);
 
        return 0;
 }
@@ -1088,33 +1121,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
                SW_FLOW_KEY_MEMCPY(match, eth.dst,
                                eth_key->eth_dst, ETH_ALEN, is_mask);
                attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
-               /* VLAN attribute is always parsed before getting here since it
-                * may occur multiple times.
-                */
-               OVS_NLERR(log, "VLAN attribute unexpected.");
-               return -EINVAL;
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
-               __be16 eth_type;
 
-               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-               if (is_mask) {
-                       /* Always exact match EtherType. */
-                       eth_type = htons(0xffff);
-               } else if (!eth_proto_is_802_3(eth_type)) {
-                       OVS_NLERR(log, "EtherType %x is less than min %x",
-                                 ntohs(eth_type), ETH_P_802_3_MIN);
+               if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
+                       /* VLAN attribute is always parsed before getting here since it
+                        * may occur multiple times.
+                        */
+                       OVS_NLERR(log, "VLAN attribute unexpected.");
                        return -EINVAL;
                }
 
-               SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-       } else if (!is_mask) {
-               SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+               if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
+                       err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
+                                                         log);
+                       if (err)
+                               return err;
+               } else if (!is_mask) {
+                       SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+               }
+       } else if (!match->key->eth.type) {
+               OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
+               return -EINVAL;
        }
 
        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
@@ -1562,42 +1588,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
        if (ovs_ct_put_key(output, skb))
                goto nla_put_failure;
 
-       nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
-       if (!nla)
-               goto nla_put_failure;
-
-       eth_key = nla_data(nla);
-       ether_addr_copy(eth_key->eth_src, output->eth.src);
-       ether_addr_copy(eth_key->eth_dst, output->eth.dst);
-
-       if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
-               if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
+       if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
+               nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+               if (!nla)
                        goto nla_put_failure;
-               encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-               if (!swkey->eth.vlan.tci)
-                       goto unencap;
 
-               if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
-                       if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+               eth_key = nla_data(nla);
+               ether_addr_copy(eth_key->eth_src, output->eth.src);
+               ether_addr_copy(eth_key->eth_dst, output->eth.dst);
+
+               if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+                       if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
                                goto nla_put_failure;
-                       in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-                       if (!swkey->eth.cvlan.tci)
+                       encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+                       if (!swkey->eth.vlan.tci)
                                goto unencap;
+
+                       if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+                               if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+                                       goto nla_put_failure;
+                               in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+                               if (!swkey->eth.cvlan.tci)
+                                       goto unencap;
+                       }
                }
-       }
 
-       if (swkey->eth.type == htons(ETH_P_802_2)) {
-               /*
-                * Ethertype 802.2 is represented in the netlink with omitted
-                * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
-                * 0xffff in the mask attribute.  Ethertype can also
-                * be wildcarded.
-                */
-               if (is_mask && output->eth.type)
-                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
-                                               output->eth.type))
-                               goto nla_put_failure;
-               goto unencap;
+               if (swkey->eth.type == htons(ETH_P_802_2)) {
+                       /*
+                        * Ethertype 802.2 is represented in the netlink with omitted
+                        * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+                        * 0xffff in the mask attribute.  Ethertype can also
+                        * be wildcarded.
+                        */
+                       if (is_mask && output->eth.type)
+                               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+                                                       output->eth.type))
+                                       goto nla_put_failure;
+                       goto unencap;
+               }
        }
 
        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
@@ -2131,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
 
 static int validate_set(const struct nlattr *a,
                        const struct sw_flow_key *flow_key,
-                       struct sw_flow_actions **sfa,
-                       bool *skip_copy, __be16 eth_type, bool masked, bool log)
+                       struct sw_flow_actions **sfa, bool *skip_copy,
+                       u8 mac_proto, __be16 eth_type, bool masked, bool log)
 {
        const struct nlattr *ovs_key = nla_data(a);
        int key_type = nla_type(ovs_key);
@@ -2162,7 +2190,11 @@ static int validate_set(const struct nlattr *a,
        case OVS_KEY_ATTR_SKB_MARK:
        case OVS_KEY_ATTR_CT_MARK:
        case OVS_KEY_ATTR_CT_LABELS:
+               break;
+
        case OVS_KEY_ATTR_ETHERNET:
+               if (mac_proto != MAC_PROTO_ETHERNET)
+                       return -EINVAL;
                break;
 
        case OVS_KEY_ATTR_TUNNEL:
@@ -2333,6 +2365,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  int depth, struct sw_flow_actions **sfa,
                                  __be16 eth_type, __be16 vlan_tci, bool log)
 {
+       u8 mac_proto = ovs_key_mac_proto(key);
        const struct nlattr *a;
        int rem, err;
 
@@ -2403,10 +2436,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                }
 
                case OVS_ACTION_ATTR_POP_VLAN:
+                       if (mac_proto != MAC_PROTO_ETHERNET)
+                               return -EINVAL;
                        vlan_tci = htons(0);
                        break;
 
                case OVS_ACTION_ATTR_PUSH_VLAN:
+                       if (mac_proto != MAC_PROTO_ETHERNET)
+                               return -EINVAL;
                        vlan = nla_data(a);
                        if (!eth_type_vlan(vlan->vlan_tpid))
                                return -EINVAL;
@@ -2456,14 +2493,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                case OVS_ACTION_ATTR_SET:
                        err = validate_set(a, key, sfa,
-                                          &skip_copy, eth_type, false, log);
+                                          &skip_copy, mac_proto, eth_type,
+                                          false, log);
                        if (err)
                                return err;
                        break;
 
                case OVS_ACTION_ATTR_SET_MASKED:
                        err = validate_set(a, key, sfa,
-                                          &skip_copy, eth_type, true, log);
+                                          &skip_copy, mac_proto, eth_type,
+                                          true, log);
                        if (err)
                                return err;
                        break;