- Linux datapath:
* Support for the kernel versions 4.19.x and 4.20.x.
* Support for the kernel version 5.0.x.
+ * Add support for conntrack zone-based timeout policy.
- 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded flows.
'ovs-appctl dpctl/dump-flows' should be used instead.
- Add L2 GRE tunnel over IPv6 support.
? dpif->dpif_class->ct_timeout_policy_dump_done(dpif, state)
: EOPNOTSUPP);
}
+
+int
+ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
+ uint16_t dl_type, uint8_t nw_proto,
+ char **tp_name, bool *is_generic)
+{
+ return (dpif->dpif_class->ct_get_timeout_policy_name
+ ? dpif->dpif_class->ct_get_timeout_policy_name(
+ dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
+ : EOPNOTSUPP);
+}
int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state,
struct ct_dpif_timeout_policy *tp);
int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
+int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
+ uint16_t dl_type, uint8_t nw_proto,
+ char **tp_name, bool *is_generic);
#endif /* CT_DPIF_H */
NULL, /* ct_timeout_policy_dump_start */
NULL, /* ct_timeout_policy_dump_next */
NULL, /* ct_timeout_policy_dump_done */
+ NULL, /* ct_get_timeout_policy_name */
dpif_netdev_ipf_set_enabled,
dpif_netdev_ipf_set_min_frag,
dpif_netdev_ipf_set_max_nfrags,
static void
dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
- struct ds *tp_name)
+ char **tp_name)
{
- ds_clear(tp_name);
- ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
- ct_dpif_format_ipproto(tp_name, l4num);
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
+ ct_dpif_format_ipproto(&ds, l4num);
if (l3num == AF_INET) {
- ds_put_cstr(tp_name, "4");
+ ds_put_cstr(&ds, "4");
} else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
- ds_put_cstr(tp_name, "6");
+ ds_put_cstr(&ds, "6");
}
- ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX);
+ ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
+
+ *tp_name = ds_steal_cstr(&ds);
+}
+
+static int
+dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+ uint32_t tp_id, uint16_t dl_type,
+ uint8_t nw_proto, char **tp_name,
+ bool *is_generic)
+{
+ dpif_netlink_format_tp_name(tp_id,
+ dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
+ nw_proto, tp_name);
+ *is_generic = false;
+ return 0;
}
#define CT_DPIF_NL_TP_TCP_MAPPINGS \
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
const struct ct_dpif_timeout_policy *tp)
{
- struct nl_ct_timeout_policy nl_tp;
- struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
int err = 0;
for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+ struct nl_ct_timeout_policy nl_tp;
+ char *nl_tp_name;
+
dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
tp_protos[i].l4num, &nl_tp_name);
- ovs_strlcpy(nl_tp.name, ds_cstr(&nl_tp_name), sizeof nl_tp.name);
+ ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
+ free(nl_tp_name);
+
nl_tp.l3num = tp_protos[i].l3num;
nl_tp.l4num = tp_protos[i].l4num;
dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
}
out:
- ds_destroy(&nl_tp_name);
return err;
}
uint32_t tp_id,
struct ct_dpif_timeout_policy *tp)
{
- struct nl_ct_timeout_policy nl_tp;
- struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
int err = 0;
tp->id = tp_id;
tp->present = 0;
for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+ struct nl_ct_timeout_policy nl_tp;
+ char *nl_tp_name;
+
dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
tp_protos[i].l4num, &nl_tp_name);
- err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp);
+ err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
if (err) {
VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
- nl_tp.name, ovs_strerror(err));
+ nl_tp_name, ovs_strerror(err));
+ free(nl_tp_name);
goto out;
}
+ free(nl_tp_name);
dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
}
out:
- ds_destroy(&nl_tp_name);
return err;
}
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
uint32_t tp_id)
{
- struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
int ret = 0;
for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+ char *nl_tp_name;
dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
tp_protos[i].l4num, &nl_tp_name);
- int err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name));
+ int err = nl_ct_del_timeout_policy(nl_tp_name);
if (err == ENOENT) {
err = 0;
}
if (err) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
- ds_cstr(&nl_tp_name), ovs_strerror(err));
+ nl_tp_name, ovs_strerror(err));
ret = 1;
}
+ free(nl_tp_name);
}
- ds_destroy(&nl_tp_name);
return ret;
}
dpif_netlink_ct_timeout_policy_dump_start,
dpif_netlink_ct_timeout_policy_dump_next,
dpif_netlink_ct_timeout_policy_dump_done,
+ dpif_netlink_ct_get_timeout_policy_name,
NULL, /* ipf_set_enabled */
NULL, /* ipf_set_min_frag */
NULL, /* ipf_set_max_nfrags */
struct ct_dpif_timeout_policy *tp);
int (*ct_timeout_policy_dump_done)(struct dpif *, void *state);
+ /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'.
+ * On success, returns 0, stores the timeout policy name in 'tp_name',
+ * and sets 'is_generic'. 'is_generic' is false if the returned timeout
+ * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the
+ * datapath (e.g., the Linux kernel datapath). Sets 'is_generic' to
+ * true, if the timeout policy supports all OVS supported L3/L4
+ * protocols.
+ *
+ * The caller is responsible for freeing 'tp_name'. */
+ int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id,
+ uint16_t dl_type, uint8_t nw_proto,
+ char **tp_name, bool *is_generic);
+
/* IP Fragmentation. */
/* Disables or enables conntrack fragment reassembly. The default
}
}
+static void
+put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer,
+ const struct flow *flow, struct flow_wildcards *wc,
+ uint16_t zone_id)
+{
+ bool unwildcard;
+ char *tp_name = NULL;
+
+ if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id,
+ ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) {
+ nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, tp_name);
+
+ if (unwildcard) {
+ /* The underlying datapath requires separate timeout
+ * policies for different Ethertypes and IP protocols. We
+ * don't need to unwildcard 'wc->masks.dl_type' since that
+ * field is always unwildcarded in megaflows. */
+ memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+ }
+ }
+ free(tp_name);
+}
+
static void
put_ct_nat(struct xlate_ctx *ctx)
{
nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_EVENTMASK,
OVS_CT_EVENTMASK_DEFAULT);
}
+ if (ctx->xbridge->support.ct_timeout) {
+ put_ct_timeout(ctx->odp_actions, ctx->xbridge->ofproto->backer,
+ &ctx->xin->flow, ctx->wc, zone);
+ }
}
nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
return supported;
}
+/* Tests whether 'backer''s datapath supports the OVS_CT_ATTR_TIMEOUT
+ * attribute in OVS_ACTION_ATTR_CT. */
+static bool
+check_ct_timeout_policy(struct dpif_backer *backer)
+{
+ struct dpif_execute execute;
+ struct dp_packet packet;
+ struct ofpbuf actions;
+ struct flow flow = {
+ .dl_type = CONSTANT_HTONS(ETH_TYPE_IP),
+ .nw_proto = IPPROTO_UDP,
+ .nw_ttl = 64,
+ /* Use the broadcast address on the loopback address range 127/8 to
+ * avoid hitting any real conntrack entries. We leave the UDP ports to
+ * zeroes for the same purpose. */
+ .nw_src = CONSTANT_HTONL(0x7fffffff),
+ .nw_dst = CONSTANT_HTONL(0x7fffffff),
+ };
+ size_t ct_start;
+ int error;
+
+ /* Compose CT action with timeout policy attribute and check if datapath
+ * can decode the message. */
+ ofpbuf_init(&actions, 64);
+ ct_start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_CT);
+ /* Timeout policy has no effect without the commit flag, but currently the
+ * datapath will accept a timeout policy even without commit. This is
+ * useful as we do not want to persist the probe connection in the
+ * conntrack table. */
+ nl_msg_put_string(&actions, OVS_CT_ATTR_TIMEOUT, "ovs_test_tp");
+ nl_msg_end_nested(&actions, ct_start);
+
+ /* Compose a dummy UDP packet. */
+ dp_packet_init(&packet, 0);
+ flow_compose(&packet, &flow, NULL, 64);
+
+ /* Execute the actions. On older datapaths this fails with EINVAL, on
+ * newer datapaths it succeeds. */
+ execute.actions = actions.data;
+ execute.actions_len = actions.size;
+ execute.packet = &packet;
+ execute.flow = &flow;
+ execute.needs_help = false;
+ execute.probe = true;
+ execute.mtu = 0;
+
+ error = dpif_execute(backer->dpif, &execute);
+
+ dp_packet_uninit(&packet);
+ ofpbuf_uninit(&actions);
+
+ if (error) {
+ VLOG_INFO("%s: Datapath does not support timeout policy in conntrack "
+ "action", dpif_name(backer->dpif));
+ } else {
+ VLOG_INFO("%s: Datapath supports timeout policy in conntrack action",
+ dpif_name(backer->dpif));
+ }
+
+ return !error;
+}
/* Tests whether 'backer''s datapath supports the
* OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
backer->rt_support.ct_clear = check_ct_clear(backer);
backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer);
backer->rt_support.check_pkt_len = check_check_pkt_len(backer);
+ backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
/* Flow fields. */
backer->rt_support.odp.ct_state = check_ct_state(backer);
}
}
+/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
+ * 'nw_proto'. Returns true if the zone-based timeout policy is configured.
+ * On success, stores the timeout policy name in 'tp_name', and sets
+ * 'unwildcard' based on the dpif implementation. If 'unwildcard' is true,
+ * the returned timeout policy is 'dl_type' and 'nw_proto' specific, and OVS
+ * needs to unwildcard the datapath flow for this timeout policy in flow
+ * translation.
+ *
+ * The caller is responsible for freeing 'tp_name'. */
+bool
+ofproto_dpif_ct_zone_timeout_policy_get_name(
+ const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
+ uint8_t nw_proto, char **tp_name, bool *unwildcard)
+{
+ if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) {
+ return false;
+ }
+
+ struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+ if (!ct_zone) {
+ return false;
+ }
+
+ bool is_generic;
+ if (ct_dpif_get_timeout_policy_name(backer->dpif,
+ ct_zone->ct_tp->tp_id, dl_type,
+ nw_proto, tp_name, &is_generic)) {
+ return false;
+ }
+
+ /* Unwildcard datapath flow if it is not a generic timeout policy. */
+ *unwildcard = !is_generic;
+ return true;
+}
+
static bool
set_frag_handling(struct ofproto *ofproto_,
enum ofputil_frag_handling frag_handling)
/* Highest supported dp_hash algorithm. */ \
DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm") \
\
- /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \
- DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action")
+ /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \
+ DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action") \
+ \
+ /* True if the datapath supports OVS_CT_ATTR_TIMEOUT in \
+ * OVS_ACTION_ATTR_CT action. */ \
+ DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy")
/* Stores the various features which the corresponding backer supports. */
struct dpif_backer_support {
bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
+bool ofproto_dpif_ct_zone_timeout_policy_get_name(
+ const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
+ uint8_t nw_proto, char **tp_name, bool *unwildcard);
+
#endif /* ofproto-dpif.h */
#
m4_define([CHECK_CONNTRACK_NAT])
+# CHECK_CONNTRACK_TIMEOUT()
+#
+# Perform requirements checks for running conntrack customized timeout tests.
+#
+m4_define([CHECK_CONNTRACK_TIMEOUT],
+[
+ AT_SKIP_IF([! cat /boot/config-$(uname -r) | grep NF_CONNTRACK_TIMEOUT | grep '=y' > /dev/null])
+ modprobe nfnetlink_cttimeout
+ on_exit 'modprobe -r nfnetlink_cttimeout'
+])
+
# CHECK_CT_DPIF_PER_ZONE_LIMIT()
#
# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
sublevel=$(uname -r | sed -e 's/\./ /g' | awk '{print $ 2}')
AT_SKIP_IF([ ! ( test $version -lt $1 || ( test $version -eq $1 && test $sublevel -lt $2 ) || test $version -gt $3 || ( test $version -eq $3 && test $sublevel -gt $4 ) ) ])
])
+
+# VSCTL_ADD_DATAPATH_TABLE()
+#
+# Create system datapath table "system" for kernel tests in ovsdb
+m4_define([VSCTL_ADD_DATAPATH_TABLE],
+[
+ AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"system"=@m], [0], [stdout])
+ DP_TYPE=$(echo "system")
+])
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - zone-based timeout policy])
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_TIMEOUT()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,ip,action=ct(zone=5, table=1)
+priority=100,in_port=2,ip,action=ct(zone=5, table=1)
+table=1,in_port=2,ip,ct_state=+trk+est,action=1
+table=1,in_port=1,ip,ct_state=+trk+new,action=ct(commit,zone=5),2
+table=1,in_port=1,ip,ct_state=+trk+est,action=2
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+dnl Test with default timeout
+dnl The default udp_single and icmp_first timeouts are 30 seconds in
+dnl kernel DP, and 60 seconds in userspace DP.
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+sleep 4
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack])
+
+dnl Shorten the udp_single and icmp_first timeout in zone 5
+VSCTL_ADD_DATAPATH_TABLE()
+AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
+])
+
+dnl Wait until the timeout expire.
+dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
+sleep 4
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_BANNER([conntrack - L7])
AT_SETUP([conntrack - IPv4 HTTP])
#
m4_define([CHECK_CONNTRACK_NAT])
+# CHECK_CONNTRACK_TIMEOUT()
+#
+# Perform requirements checks for running conntrack customized timeout tests.
+* The userspace datapath does not support this feature yet.
+#
+m4_define([CHECK_CONNTRACK_TIMEOUT],
+[
+ AT_SKIP_IF([:])
+])
+
# CHECK_CT_DPIF_PER_ZONE_LIMIT()
#
# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
[
AT_SKIP_IF([:])
])
+
+# VSCTL_ADD_DATAPATH_TABLE()
+#
+# Create datapath table "netdev" for userspace tests in ovsdb
+m4_define([VSCTL_ADD_DATAPATH_TABLE],
+[
+ AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"netdev"=@m], [0], [stdout])
+ DP_TYPE=$(echo "netdev")
+])