From: Jarno Rajahalme Date: Fri, 13 Jun 2014 17:38:05 +0000 (-0700) Subject: lib/classifier: Optimize megaflows for single rule case. X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=a64759f02d8324caf6c37af0ac4e3e1d26e02a43;p=ovs.git lib/classifier: Optimize megaflows for single rule case. When, during a classifier lookup, we narrow down to a single potential rule, it is enough to match on ("unwildcard") one bit that differs between the packet and the rule. This is a special case of the more general algorithm, where it is sufficient to match on enough bits that separates the packet from all higher priority rules than the matched rule. For a miss that would be all the rules. Implementing this is expensive for a more than a few rules. This patch starts by doing this for a single rule when we already have it, also reducing the lookup cost by finishing the lookup earlier than before. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- diff --git a/lib/classifier.c b/lib/classifier.c index fdf5035c6..7aa389f79 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -1361,19 +1361,29 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, * value has the correct value in 'target'. * * This function is equivalent to miniflow_equal_flow_in_minimask(flow, - * target, mask) but it is faster because of the invariant that - * flow->map and mask->masks.map are the same. */ + * target, mask) but this is faster because of the invariant that + * flow->map and mask->masks.map are the same, and that this version + * takes the 'wc'. */ static inline bool miniflow_and_mask_matches_flow(const struct miniflow *flow, const struct minimask *mask, - const struct flow *target) + const struct flow *target, + struct flow_wildcards *wc) { const uint32_t *flowp = miniflow_get_u32_values(flow); const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); - uint32_t target_u32; + uint32_t idx; - FLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) { - if ((*flowp++ ^ target_u32) & *maskp++) { + MAP_FOR_EACH_INDEX(idx, mask->masks.map) { + uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++; + + if (diff) { + /* Only unwildcard if none of the differing bits is already + * exact-matched. */ + if (wc && !(flow_u32_value(&wc->masks, idx) & diff)) { + /* Keep one bit of the difference. */ + *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff); + } return false; } } @@ -1389,7 +1399,7 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow, HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, - flow)) { + flow, NULL)) { return rule; } } @@ -1403,7 +1413,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, struct flow_wildcards *wc) { uint32_t basis = 0, hash; - struct cls_match *rule = NULL; + struct cls_match *rule; int i; struct range ofs; @@ -1433,23 +1443,17 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, } /* If we have narrowed down to a single rule already, check whether - * that rule matches. If it does match, then we're done. If it does - * not match, then we know that we will never get a match, but we do - * not yet know how many wildcards we need to fold into 'wc' so we - * continue iterating through indices to find that out. (We won't - * waste time calling miniflow_and_mask_matches_flow() again because - * we've set 'rule' nonnull.) - * - * This check shows a measurable benefit with non-trivial flow tables. + * that rule matches. Either way, we're done. * * (Rare) hash collisions may cause us to miss the opportunity for this * optimization. */ - if (!inode->s && !rule) { + if (!inode->s) { ASSIGN_CONTAINER(rule, inode - i, index_nodes); if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, - flow)) { + flow, wc)) { goto out; } + goto range_out; } } ofs.end = FLOW_U32S; @@ -1457,16 +1461,9 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { goto range_out; } - if (!rule) { - /* Multiple potential matches exist, look for one. */ - hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, - ofs.end, &basis); - rule = find_match(subtable, flow, hash); - } else { - /* We already narrowed the matching candidates down to just 'rule', - * but it didn't match. */ - rule = NULL; - } + hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, + ofs.end, &basis); + rule = find_match(subtable, flow, hash); if (!rule && subtable->ports_mask_len) { /* Ports are always part of the final range, if any. * No match was found for the ports. Use the ports trie to figure out @@ -1484,12 +1481,12 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, ofs.start = TP_PORTS_OFS32; goto range_out; } - out: +out: /* Must unwildcard all the fields, as they were looked at. */ flow_wildcards_fold_minimask(wc, &subtable->mask); return rule; - range_out: +range_out: /* Must unwildcard the fields looked up so far, if any. */ if (ofs.start) { flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start); diff --git a/lib/flow.h b/lib/flow.h index 2ca7e27ff..1bce46c56 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -413,11 +413,21 @@ void miniflow_destroy(struct miniflow *); void miniflow_expand(const struct miniflow *, struct flow *); +static inline uint32_t flow_u32_value(const struct flow *flow, size_t index) +{ + return ((uint32_t *)(flow))[index]; +} + +static inline uint32_t *flow_u32_lvalue(struct flow *flow, size_t index) +{ + return &((uint32_t *)(flow))[index]; +} + static inline bool flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value) { if (map) { - *value = ((const uint32_t *)flow)[raw_ctz(map)]; + *value = flow_u32_value(flow, raw_ctz(map)); return true; } return false; @@ -429,6 +439,12 @@ flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value) flow_get_next_in_map(FLOW, map__, &(VALUE)); \ map__ = zero_rightmost_1bit(map__)) +/* Iterate through all struct flow u32 indices specified by 'MAP'. */ +#define MAP_FOR_EACH_INDEX(U32IDX, MAP) \ + for (uint64_t map__ = (MAP); \ + ((U32IDX) = ctz64(map__)) < FLOW_U32S; \ + map__ = zero_rightmost_1bit(map__)) + #define FLOW_U32_SIZE(FIELD) \ DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint32_t)) diff --git a/tests/classifier.at b/tests/classifier.at index 7bd049351..710060fc4 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -45,7 +45,7 @@ Datapath actions: 1 ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=0.0.0.0/2.0.0.0,nw_frag=no Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) @@ -55,7 +55,7 @@ Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x1/0x1 Datapath actions: 2 ]) OVS_VSWITCHD_STOP @@ -83,7 +83,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) # nw_dst and nw_src should be on by default AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no Datapath actions: drop ]) @@ -98,7 +98,7 @@ AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst,nw_dst], [1], [], AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=nw_dst], [0]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,ip,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) @@ -113,7 +113,7 @@ Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0x1,tp_dst=0x40/0xfff0 Datapath actions: 3 ]) AT_CHECK([ovs-vsctl set Flow_Table t0 prefixes=none], [0]) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 6506ece02..f6e788b92 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -4785,7 +4785,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: -skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: +skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -4803,7 +4803,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl -skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: +skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.2,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: ]) OVS_VSWITCHD_STOP @@ -4823,7 +4823,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:1:2:3:4:5/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff,dst=fe80::2/::,label=0/0,proto=10/0,tclass=0x70/0,hlimit=128/0,frag=no/0xff), actions: -skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/ffff:ffff:ffff:fffc::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: +skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x86dd),ipv6(src=2001:db8:3c4d:5:4:3:2:1/0:0:0:4::,dst=2001:db8:3c4d:1:2:3:4:1/::,label=0/0,proto=99/0,tclass=0x70/0,hlimit=64/0,frag=no/0xff), actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -5007,7 +5007,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: -skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/ff:ff:ff:ff:ff:ff,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: +skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:02,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4/0.0.0.0,dst=10.0.0.3/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -5208,7 +5208,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl -skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: +skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.2,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8/0,code=0/0), actions: skb_priority(0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:0b/00:00:00:00:00:00,dst=50:54:00:00:00:0c/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8/0,code=0/0), actions: ]) OVS_VSWITCHD_STOP