OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
-
#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
#endif
+ OVS_KEY_ATTR_DP_HASH = 20, /* u32 hash value */
+ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
+
OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
+/* Data path hash algorithm for computing Datapath hash.
+ *
+ * The Algorithm type only specifies the fields in a flow
+ * will be used as part of the hash. Each datapath is free
+ * to use its own hash algorithm. The hash value will be
+ * opaque to the user space daemon.
+ */
+enum ovs_recirc_hash_alg {
+ OVS_RECIRC_HASH_ALG_NONE,
+ OVS_RECIRC_HASH_ALG_L4,
+};
+/*
+ * struct ovs_action_recirc - %OVS_ACTION_ATTR_RECIRC action argument.
+ * @recirc_id: The Recirculation label, Zero is invalid.
+ * @hash_alg: Algorithm used to compute hash prior to recirculation.
+ * @hash_bias: bias used for computing hash. used to compute hash prior to recirculation.
+ */
+struct ovs_action_recirc {
+ uint32_t hash_alg; /* One of ovs_dp_hash_alg. */
+ uint32_t hash_bias;
+ uint32_t recirc_id; /* Recirculation label. */
+};
+
/**
* enum ovs_action_attr - Action types.
*
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
+ * @OVS_ACTION_RECIRC: Recirculate within the data path.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
+ OVS_ACTION_ATTR_RECIRC, /* struct ovs_action_recirc. */
__OVS_ACTION_ATTR_MAX
};
static void
dp_execute_cb(void *aux_, struct ofpbuf *packet,
- const struct pkt_metadata *md OVS_UNUSED,
+ struct pkt_metadata *md,
const struct nlattr *a, bool may_steal)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
}
break;
}
+
+ case OVS_ACTION_ATTR_RECIRC: {
+ const struct ovs_action_recirc *act;
+ act = nl_attr_get(a);
+ md->recirc_id =act->recirc_id;
+ md->dp_hash = 0;
+
+ if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+ struct flow flow;
+
+ flow_extract(packet, md, &flow);
+ md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
+ }
+
+ dp_netdev_port_input(aux->dp, packet, md);
+ break;
+ }
+
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:
* meaningful. */
static void
dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
- const struct pkt_metadata *md,
+ struct pkt_metadata *md,
const struct nlattr *action, bool may_steal OVS_UNUSED)
{
struct dpif_execute_helper_aux *aux = aux_;
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_UNSPEC:
+ case OVS_ACTION_ATTR_RECIRC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
set_arp(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_arp)));
break;
+ case OVS_KEY_ATTR_DP_HASH:
+ md->dp_hash = nl_attr_get_u32(a);
+ break;
+
+ case OVS_KEY_ATTR_RECIRC_ID:
+ md->recirc_id = nl_attr_get_u32(a);
+ break;
+
case OVS_KEY_ATTR_UNSPEC:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_RECIRC:
if (dp_execute_action) {
bool may_steal;
/* Allow 'dp_execute_action' to steal the packet data if we do
struct pkt_metadata;
typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet,
- const struct pkt_metadata *,
+ struct pkt_metadata *,
const struct nlattr *action, bool may_steal);
/* Actions that need to be executed in the context of a datapath are handed
case OVS_ACTION_ATTR_POP_VLAN: return 0;
case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls);
case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
+ case OVS_ACTION_ATTR_RECIRC: return sizeof(struct ovs_action_recirc);
case OVS_ACTION_ATTR_SET: return -2;
case OVS_ACTION_ATTR_SAMPLE: return -2;
case OVS_KEY_ATTR_ARP: return "arp";
case OVS_KEY_ATTR_ND: return "nd";
case OVS_KEY_ATTR_MPLS: return "mpls";
+ case OVS_KEY_ATTR_DP_HASH: return "dp_hash";
+ case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id";
case __OVS_KEY_ATTR_MAX:
default:
}
}
+static void
+format_odp_recirc_action(struct ds *ds,
+ const struct ovs_action_recirc *act)
+{
+ ds_put_format(ds, "recirc(");
+
+ if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+ ds_put_format(ds, "hash_l4(%"PRIu32"), ", act->hash_bias);
+ }
+
+ ds_put_format(ds, "%"PRIu32")", act->recirc_id);
+}
+
static void
format_odp_action(struct ds *ds, const struct nlattr *a)
{
case OVS_ACTION_ATTR_USERSPACE:
format_odp_userspace_action(ds, a);
break;
+ case OVS_ACTION_ATTR_RECIRC:
+ format_odp_recirc_action(ds, nl_attr_get(a));
+ break;
case OVS_ACTION_ATTR_SET:
ds_put_cstr(ds, "set(");
format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true);
case OVS_KEY_ATTR_ENCAP: return -2;
case OVS_KEY_ATTR_PRIORITY: return 4;
case OVS_KEY_ATTR_SKB_MARK: return 4;
+ case OVS_KEY_ATTR_DP_HASH: return 4;
+ case OVS_KEY_ATTR_RECIRC_ID: return 4;
case OVS_KEY_ATTR_TUNNEL: return -2;
case OVS_KEY_ATTR_IN_PORT: return 4;
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_DP_HASH:
+ case OVS_KEY_ATTR_RECIRC_ID:
ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a));
if (!is_exact) {
ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
}
break;
}
-
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
}
}
+ {
+ uint32_t recirc_id;
+ int n = -1;
+
+ if (ovs_scan(s, "recirc_id(%"SCNi32")%n", &recirc_id, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_RECIRC_ID, recirc_id);
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_RECIRC_ID, UINT32_MAX);
+ return n;
+ }
+ }
+
+ {
+ uint32_t dp_hash;
+ uint32_t dp_hash_mask;
+ int n = -1;
+
+ if (mask && ovs_scan(s, "dp_hash(%"SCNi32"/%"SCNi32")%n", &dp_hash,
+ &dp_hash_mask, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, dp_hash_mask);
+ return n;
+ } else if (ovs_scan(s, "dp_hash(%"SCNi32")%n", &dp_hash, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
+ if (mask) {
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, UINT32_MAX);
+ }
+ return n;
+ }
+ }
+
{
uint64_t tun_id, tun_id_mask;
struct flow_tnl tun_key, tun_key_mask;
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
+ if (flow->recirc_id) {
+ nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id);
+ }
+
+ if (flow->dp_hash) {
+ nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash);
+ }
+
/* Add an ingress port attribute if this is a mask or 'odp_in_port'
* is not the magical value "ODPP_NONE". */
if (is_mask || odp_in_port != ODPP_NONE) {
continue;
}
- if (type == OVS_KEY_ATTR_PRIORITY) {
+ switch (type) {
+ case OVS_KEY_ATTR_RECIRC_ID:
+ md->recirc_id = nl_attr_get_u32(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_RECIRC_ID);
+ break;
+ case OVS_KEY_ATTR_DP_HASH:
+ md->dp_hash = nl_attr_get_u32(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_DP_HASH);
+ break;
+ case OVS_KEY_ATTR_PRIORITY:
md->skb_priority = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_PRIORITY);
- } else if (type == OVS_KEY_ATTR_SKB_MARK) {
+ break;
+ case OVS_KEY_ATTR_SKB_MARK:
md->pkt_mark = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK);
- } else if (type == OVS_KEY_ATTR_TUNNEL) {
+ break;
+ case OVS_KEY_ATTR_TUNNEL: {
enum odp_key_fitness res;
res = odp_tun_key_from_attr(nla, &md->tunnel);
} else if (res == ODP_FIT_PERFECT) {
wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL);
}
- } else if (type == OVS_KEY_ATTR_IN_PORT) {
+ break;
+ }
+ case OVS_KEY_ATTR_IN_PORT:
md->in_port.odp_port = nl_attr_get_odp_port(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT);
+ break;
+ default:
+ break;
}
if (!wanted_attrs) {
expected_attrs = 0;
/* Metadata. */
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) {
+ flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID;
+ } else if (is_mask) {
+ /* Always exact match recirc_id when datapath does not sepcify it. */
+ flow->recirc_id = UINT32_MAX;
+ }
+
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_DP_HASH)) {
+ flow->dp_hash = nl_attr_get_u32(attrs[OVS_KEY_ATTR_DP_HASH]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_DP_HASH;
+ }
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) {
flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY;
/* Datapath packet metadata */
struct pkt_metadata {
+ uint32_t recirc_id; /* Recirculation id carried with the
+ recirculating packets. 0 for packets
+ received from the wire. */
+ uint32_t dp_hash; /* hash value computed by the recirculation
+ action. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
};
#define PKT_METADATA_INITIALIZER(PORT) \
- (struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
+ (struct pkt_metadata){ 0, 0, { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
static inline struct pkt_metadata
pkt_metadata_from_flow(const struct flow *flow)
{
struct pkt_metadata md;
+ md.recirc_id = flow->recirc_id;
+ md.dp_hash = flow->dp_hash;
md.tunnel = flow->tunnel;
md.skb_priority = flow->skb_priority;
md.pkt_mark = flow->pkt_mark;
struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
+/*
+ * Recirculation
+ * =============
+ *
+ * Recirculation is a technique to allow a frame to re-enter the packet processing
+ * path for one or multiple times to achieve more flexible packet processing in the
+ * data path. MPLS handling and selecting bond slave port of a bond ports.
+ *
+ * Data path and user space interface
+ * -----------------------------------
+ *
+ * Two new fields, recirc_id and dp_hash, are added to the current flow data structure.
+ * They are both both of type uint32_t. In addition, a new action, RECIRC, are added.
+ *
+ * The value recirc_id is used to distinguish a packet from multiple iterations of
+ * recirculation. A packet initially received is considered of having recirc_id of 0.
+ * Recirc_id is managed by the user space, opaque to the data path.
+ *
+ * On the other hand, dp_hash can only be computed by the data path, opaque to
+ * the user space. In fact, user space may not able to recompute the hash value.
+ * The dp_hash value should be wildcarded when for a newly received packet.
+ * RECIRC action specifies whether the hash is computed. If computed, how many
+ * fields to be included in the hash computation. The computed hash value is
+ * stored into the dp_hash field prior to recirculation.
+ *
+ * The RECIRC action computes and set the dp_hash field, set the recirc_id field
+ * and then reprocess the packet as if it was received on the same input port.
+ * RECIRC action works like a function call; actions listed behind the RECIRC
+ * action will be executed after its execution. RECIRC action can be nested,
+ * data path implementation limits the number of recirculation executed
+ * to prevent unreasonable nesting depth or infinite loop.
+ *
+ * Both flow fields and the RECIRC action are exposed as open flow fields via
+ * Nicira extensions.
+ *
+ * Post recirculation flow
+ * ------------------------
+ *
+ * At the open flow level, post recirculation rules are always hidden from the
+ * controller. They are installed in table 254 which is set up as a hidden table
+ * during boot time. Those rules are managed by the local user space program only.
+ *
+ * To speed up the classifier look up process, recirc_id is always reflected into
+ * the metadata field, since recirc_id is required to be exactly matched.
+ *
+ * Classifier look up always starts with table 254. A post recirculation flow
+ * lookup should find its hidden rule within this table. On the other hand, A
+ * newly received packet should miss all post recirculation rules because its
+ * recirc_id is zero, then hit a pre-installed lower priority rule to redirect
+ * classifier to look up starting from table 0:
+ *
+ * * , actions=resubmit(,0)
+ *
+ * Post recirculation data path flows are managed like other data path flows.
+ * They are created on demand. Miss handling, stats collection and revalidation
+ * work the same way as regular flows.
+ */
+
uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
#endif /* ofproto-dpif.h */