/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
#ifndef FLOW_H
#define FLOW_H 1
+#include <linux/cache.h>
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <linux/in6.h>
#include <linux/jiffies.h>
#include <linux/time.h>
-#include <linux/flex_array.h>
-
+#include <linux/cpumask.h>
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
+#include <net/dst_metadata.h>
+#include <net/nsh.h>
struct sk_buff;
-struct sw_flow_mask;
-struct flow_table;
-struct sw_flow_actions {
- struct rcu_head rcu;
- u32 actions_len;
- struct nlattr actions[];
+enum sw_flow_mac_proto {
+ MAC_PROTO_NONE = 0,
+ MAC_PROTO_ETHERNET,
+};
+#define SW_FLOW_KEY_INVALID 0x80
+#define MPLS_LABEL_DEPTH 3
+
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define TUN_METADATA_OFFSET(opt_len) \
+ (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
+#define TUN_METADATA_OPTS(flow_key, opt_len) \
+ ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
+
+struct ovs_tunnel_info {
+ struct metadata_dst *tun_dst;
+};
+
+struct vlan_head {
+ __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+ __be16 tci; /* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */
};
-/* Used to memset ovs_key_ipv4_tunnel padding. */
-#define OVS_TUNNEL_KEY_SIZE \
- (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
- FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+#define OVS_SW_FLOW_KEY_METADATA_SIZE \
+ (offsetof(struct sw_flow_key, recirc_id) + \
+ sizeof_field(struct sw_flow_key, recirc_id))
-struct ovs_key_ipv4_tunnel {
- __be64 tun_id;
- __be32 ipv4_src;
- __be32 ipv4_dst;
- __be16 tun_flags;
- u8 ipv4_tos;
- u8 ipv4_ttl;
+struct ovs_key_nsh {
+ struct ovs_nsh_key_base base;
+ __be32 context[NSH_MD1_CONTEXT_SIZE];
};
struct sw_flow_key {
- struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
+ u8 tun_opts[255];
+ u8 tun_opts_len;
+ struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
struct {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
- } phy;
+ } __packed phy; /* Safe when right after 'tun_key'. */
+ u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
+ u8 tun_proto; /* Protocol of encapsulating tunnel. */
+ u32 ovs_flow_hash; /* Datapath computed hash value. */
+ u32 recirc_id; /* Recirculation ID. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
- __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+ struct vlan_head vlan;
+ struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */
} eth;
+ /* Filling a hole of two bytes. */
+ u8 ct_state;
+ u8 ct_orig_proto; /* CT original direction tuple IP
+ * protocol.
+ */
+ union {
+ struct {
+ u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
+ u8 tos; /* IP ToS. */
+ u8 ttl; /* IP TTL/hop limit. */
+ u8 frag; /* One of OVS_FRAG_TYPE_*. */
+ } ip;
+ };
+ u16 ct_zone; /* Conntrack zone. */
struct {
- u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
- u8 tos; /* IP ToS. */
- u8 ttl; /* IP TTL/hop limit. */
- u8 frag; /* One of OVS_FRAG_TYPE_*. */
- } ip;
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
+ __be16 flags; /* TCP flags. */
+ } tp;
union {
struct {
struct {
} addr;
union {
struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
- } tp;
+ __be32 src;
+ __be32 dst;
+ } ct_orig; /* Conntrack original direction fields. */
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
- struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
- } tp;
- struct {
- struct in6_addr target; /* ND target address. */
- u8 sll[ETH_ALEN]; /* ND source link layer address. */
- u8 tll[ETH_ALEN]; /* ND target link layer address. */
- } nd;
+ union {
+ struct {
+ struct in6_addr src;
+ struct in6_addr dst;
+ } ct_orig; /* Conntrack original direction fields. */
+ struct {
+ struct in6_addr target; /* ND target address. */
+ u8 sll[ETH_ALEN]; /* ND source link layer address. */
+ u8 tll[ETH_ALEN]; /* ND target link layer address. */
+ } nd;
+ };
} ipv6;
+ struct {
+ u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+ __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
+ } mpls;
+ struct ovs_key_nsh nsh; /* network service header */
};
-};
+ struct {
+ /* Connection tracking fields not packed above. */
+ struct {
+ __be16 src; /* CT orig tuple tp src port. */
+ __be16 dst; /* CT orig tuple tp dst port. */
+ } orig_tp;
+ u32 mark;
+ struct ovs_key_ct_labels labels;
+ } ct;
-struct sw_flow {
- struct rcu_head rcu;
- struct hlist_node hash_node[2];
- u32 hash;
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
- struct sw_flow_key key;
- struct sw_flow_key unmasked_key;
- struct sw_flow_mask *mask;
- struct sw_flow_actions __rcu *sf_acts;
-
- spinlock_t lock; /* Lock for values below. */
- unsigned long used; /* Last used time (in jiffies). */
- u64 packet_count; /* Number of packets matched. */
- u64 byte_count; /* Number of bytes matched. */
- u8 tcp_flags; /* Union of seen TCP flags. */
-};
+static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
+{
+ return key->eth.type == htons(ETH_P_IPV6) &&
+ key->ip.proto == NEXTHDR_ICMP &&
+ key->tp.dst == 0 &&
+ (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
+}
struct sw_flow_key_range {
- size_t start;
- size_t end;
+ unsigned short int start;
+ unsigned short int end;
};
-static inline u16 ovs_sw_flow_key_range_actual_size(const struct sw_flow_key_range *range)
-{
- return range->end - range->start;
-}
+struct sw_flow_mask {
+ int ref_count;
+ struct rcu_head rcu;
+ struct sw_flow_key_range range;
+ struct sw_flow_key key;
+};
struct sw_flow_match {
struct sw_flow_key *key;
struct sw_flow_mask *mask;
};
-void ovs_match_init(struct sw_flow_match *match,
- struct sw_flow_key *key, struct sw_flow_mask *mask);
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+ u32 ufid_len;
+ union {
+ u32 ufid[MAX_UFID_LENGTH / 4];
+ struct sw_flow_key *unmasked_key;
+ };
+};
+
+struct sw_flow_actions {
+ struct rcu_head rcu;
+ size_t orig_len; /* From flow_cmd_new netlink actions size */
+ u32 actions_len;
+ struct nlattr actions[];
+};
+
+struct sw_flow_stats {
+ u64 packet_count; /* Number of packets matched. */
+ u64 byte_count; /* Number of bytes matched. */
+ unsigned long used; /* Last used time (in jiffies). */
+ spinlock_t lock; /* Lock for atomic stats update. */
+ __be16 tcp_flags; /* Union of seen TCP flags. */
+};
+
+struct sw_flow {
+ struct rcu_head rcu;
+ struct {
+ struct hlist_node node[2];
+ u32 hash;
+ } flow_table, ufid_table;
+ int stats_last_writer; /* CPU id of the last writer on
+ * 'stats[0]'.
+ */
+ struct sw_flow_key key;
+ struct sw_flow_id id;
+ struct cpumask cpu_used_mask;
+ struct sw_flow_mask *mask;
+ struct sw_flow_actions __rcu *sf_acts;
+ struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
+ * is allocated at flow creation time,
+ * the rest are allocated on demand
+ * while holding the 'stats[0].lock'.
+ */
+};
struct arp_eth_header {
__be16 ar_hrd; /* format of hardware address */
unsigned char ar_tip[4]; /* target IP address */
} __packed;
-int ovs_flow_init(void);
-void ovs_flow_exit(void);
-
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *, bool deferred);
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
-void ovs_flow_used(struct sw_flow *, struct sk_buff *);
-u64 ovs_flow_used_time(unsigned long flow_jiffies);
-int ovs_flow_to_nlattrs(const struct sw_flow_key *,
- const struct sw_flow_key *, struct sk_buff *);
-int ovs_match_from_nlattrs(struct sw_flow_match *match,
- const struct nlattr *,
- const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
- const struct nlattr *attr);
-
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-#define TBL_MIN_BUCKETS 1024
-
-struct flow_table {
- struct flex_array *buckets;
- unsigned int count, n_buckets;
- struct rcu_head rcu;
- struct list_head *mask_list;
- int node_ver;
- u32 hash_seed;
- bool keep_flows;
-};
-
-static inline int ovs_flow_tbl_count(struct flow_table *table)
+static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
{
- return table->count;
+ return key->mac_proto & ~SW_FLOW_KEY_INVALID;
}
-static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
+static inline u16 __ovs_mac_header_len(u8 mac_proto)
{
- return (table->count > table->n_buckets);
+ return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
}
-struct sw_flow *ovs_flow_lookup(struct flow_table *,
- const struct sw_flow_key *);
-struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
- struct sw_flow_match *match);
-
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
-struct flow_table *ovs_flow_tbl_alloc(int new_size);
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
-void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
-
-struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
-extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask);
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output);
-
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key, int key_len);
-
-struct sw_flow_mask {
- int ref_count;
- struct rcu_head rcu;
- struct list_head list;
- struct sw_flow_key_range range;
- struct sw_flow_key key;
-};
+static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
+{
+ return __ovs_mac_header_len(ovs_key_mac_proto(key));
+}
-static inline u16
-ovs_sw_flow_mask_actual_size(const struct sw_flow_mask *mask)
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
- return ovs_sw_flow_key_range_actual_size(&mask->range);
+ return sfid->ufid_len;
}
-static inline u16
-ovs_sw_flow_mask_size_roundup(const struct sw_flow_mask *mask)
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
{
- return roundup(ovs_sw_flow_mask_actual_size(mask), sizeof(u32));
+ return !ovs_identifier_is_ufid(sfid);
}
-struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
-void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
-void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
-void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
-struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
- const struct sw_flow_mask *);
-void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask);
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+ const struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
+ unsigned long *used, __be16 *tcp_flags);
+void ovs_flow_stats_clear(struct sw_flow *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+/* Update the non-metadata part of the flow key using skb. */
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+ struct sk_buff *skb,
+ struct sw_flow_key *key);
+/* Extract key from packet coming from userspace. */
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
+ struct sk_buff *skb,
+ struct sw_flow_key *key, bool log);
+
#endif /* flow.h */