2 * Copyright (c) 2016 Mellanox Technologies, Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "netdev-tc-offloads.h"
21 #include <linux/if_ether.h>
25 #include "openvswitch/hmap.h"
26 #include "openvswitch/match.h"
27 #include "openvswitch/ofpbuf.h"
28 #include "openvswitch/thread.h"
29 #include "openvswitch/types.h"
30 #include "openvswitch/util.h"
31 #include "openvswitch/vlog.h"
32 #include "netdev-linux.h"
34 #include "netlink-socket.h"
35 #include "odp-netlink.h"
38 #include "unaligned.h"
41 VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads
);
43 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
45 static struct hmap ufid_tc
= HMAP_INITIALIZER(&ufid_tc
);
46 static bool multi_mask_per_prio
= false;
47 static bool block_support
= false;
49 struct netlink_field
{
55 static struct netlink_field set_flower_map
[][3] = {
56 [OVS_KEY_ATTR_IPV4
] = {
57 { offsetof(struct ovs_key_ipv4
, ipv4_src
),
58 offsetof(struct tc_flower_key
, ipv4
.ipv4_src
),
59 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.ipv4_src
)
61 { offsetof(struct ovs_key_ipv4
, ipv4_dst
),
62 offsetof(struct tc_flower_key
, ipv4
.ipv4_dst
),
63 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.ipv4_dst
)
65 { offsetof(struct ovs_key_ipv4
, ipv4_ttl
),
66 offsetof(struct tc_flower_key
, ipv4
.rewrite_ttl
),
67 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.rewrite_ttl
)
70 [OVS_KEY_ATTR_IPV6
] = {
71 { offsetof(struct ovs_key_ipv6
, ipv6_src
),
72 offsetof(struct tc_flower_key
, ipv6
.ipv6_src
),
73 MEMBER_SIZEOF(struct tc_flower_key
, ipv6
.ipv6_src
)
75 { offsetof(struct ovs_key_ipv6
, ipv6_dst
),
76 offsetof(struct tc_flower_key
, ipv6
.ipv6_dst
),
77 MEMBER_SIZEOF(struct tc_flower_key
, ipv6
.ipv6_dst
)
80 [OVS_KEY_ATTR_ETHERNET
] = {
81 { offsetof(struct ovs_key_ethernet
, eth_src
),
82 offsetof(struct tc_flower_key
, src_mac
),
83 MEMBER_SIZEOF(struct tc_flower_key
, src_mac
)
85 { offsetof(struct ovs_key_ethernet
, eth_dst
),
86 offsetof(struct tc_flower_key
, dst_mac
),
87 MEMBER_SIZEOF(struct tc_flower_key
, dst_mac
)
90 [OVS_KEY_ATTR_ETHERTYPE
] = {
92 offsetof(struct tc_flower_key
, eth_type
),
93 MEMBER_SIZEOF(struct tc_flower_key
, eth_type
)
96 [OVS_KEY_ATTR_TCP
] = {
97 { offsetof(struct ovs_key_tcp
, tcp_src
),
98 offsetof(struct tc_flower_key
, tcp_src
),
99 MEMBER_SIZEOF(struct tc_flower_key
, tcp_src
)
101 { offsetof(struct ovs_key_tcp
, tcp_dst
),
102 offsetof(struct tc_flower_key
, tcp_dst
),
103 MEMBER_SIZEOF(struct tc_flower_key
, tcp_dst
)
106 [OVS_KEY_ATTR_UDP
] = {
107 { offsetof(struct ovs_key_udp
, udp_src
),
108 offsetof(struct tc_flower_key
, udp_src
),
109 MEMBER_SIZEOF(struct tc_flower_key
, udp_src
)
111 { offsetof(struct ovs_key_udp
, udp_dst
),
112 offsetof(struct tc_flower_key
, udp_dst
),
113 MEMBER_SIZEOF(struct tc_flower_key
, udp_dst
)
118 static struct ovs_mutex ufid_lock
= OVS_MUTEX_INITIALIZER
;
121 * struct ufid_tc_data - data entry for ufid_tc hmap.
122 * @ufid_node: Element in @ufid_tc hash table by ufid key.
123 * @tc_node: Element in @ufid_tc hash table by prio/handle/ifindex key.
124 * @ufid: ufid assigned to the flow
127 * @ifindex: netdev ifindex.
128 * @netdev: netdev associated with the tc rule
130 struct ufid_tc_data
{
131 struct hmap_node ufid_node
;
132 struct hmap_node tc_node
;
137 struct netdev
*netdev
;
140 /* Remove matching ufid entry from ufid_tc hashmap. */
142 del_ufid_tc_mapping(const ovs_u128
*ufid
)
144 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
145 struct ufid_tc_data
*data
;
147 ovs_mutex_lock(&ufid_lock
);
148 HMAP_FOR_EACH_WITH_HASH(data
, ufid_node
, ufid_hash
, &ufid_tc
) {
149 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
155 ovs_mutex_unlock(&ufid_lock
);
159 hmap_remove(&ufid_tc
, &data
->ufid_node
);
160 hmap_remove(&ufid_tc
, &data
->tc_node
);
161 netdev_close(data
->netdev
);
163 ovs_mutex_unlock(&ufid_lock
);
166 /* Add ufid entry to ufid_tc hashmap.
167 * If entry exists already it will be replaced. */
169 add_ufid_tc_mapping(const ovs_u128
*ufid
, int prio
, int handle
,
170 struct netdev
*netdev
, int ifindex
)
172 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
173 size_t tc_hash
= hash_int(hash_int(prio
, handle
), ifindex
);
174 struct ufid_tc_data
*new_data
= xzalloc(sizeof *new_data
);
176 del_ufid_tc_mapping(ufid
);
178 new_data
->ufid
= *ufid
;
179 new_data
->prio
= prio
;
180 new_data
->handle
= handle
;
181 new_data
->netdev
= netdev_ref(netdev
);
182 new_data
->ifindex
= ifindex
;
184 ovs_mutex_lock(&ufid_lock
);
185 hmap_insert(&ufid_tc
, &new_data
->ufid_node
, ufid_hash
);
186 hmap_insert(&ufid_tc
, &new_data
->tc_node
, tc_hash
);
187 ovs_mutex_unlock(&ufid_lock
);
190 /* Get ufid from ufid_tc hashmap.
192 * If netdev output param is not NULL then the function will return
193 * associated netdev on success and a refcount is taken on that netdev.
194 * The caller is then responsible to close the netdev.
196 * Returns handle if successful and fill prio and netdev for that ufid.
197 * Otherwise returns 0.
200 get_ufid_tc_mapping(const ovs_u128
*ufid
, int *prio
, struct netdev
**netdev
)
202 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
203 struct ufid_tc_data
*data
;
206 ovs_mutex_lock(&ufid_lock
);
207 HMAP_FOR_EACH_WITH_HASH(data
, ufid_node
, ufid_hash
, &ufid_tc
) {
208 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
213 *netdev
= netdev_ref(data
->netdev
);
215 handle
= data
->handle
;
219 ovs_mutex_unlock(&ufid_lock
);
224 /* Find ufid entry in ufid_tc hashmap using prio, handle and netdev.
225 * The result is saved in ufid.
227 * Returns true on success.
230 find_ufid(int prio
, int handle
, struct netdev
*netdev
, ovs_u128
*ufid
)
232 int ifindex
= netdev_get_ifindex(netdev
);
233 struct ufid_tc_data
*data
;
234 size_t tc_hash
= hash_int(hash_int(prio
, handle
), ifindex
);
236 ovs_mutex_lock(&ufid_lock
);
237 HMAP_FOR_EACH_WITH_HASH(data
, tc_node
, tc_hash
, &ufid_tc
) {
238 if (data
->prio
== prio
&& data
->handle
== handle
239 && data
->ifindex
== ifindex
) {
244 ovs_mutex_unlock(&ufid_lock
);
246 return (data
!= NULL
);
249 struct prio_map_data
{
250 struct hmap_node node
;
251 struct tc_flower_key mask
;
256 /* Get free prio for tc flower
257 * If prio is already allocated for mask/eth_type combination then return it.
258 * If not assign new prio.
260 * Return prio on success or 0 if we are out of prios.
263 get_prio_for_tc_flower(struct tc_flower
*flower
)
265 static struct hmap prios
= HMAP_INITIALIZER(&prios
);
266 static struct ovs_mutex prios_lock
= OVS_MUTEX_INITIALIZER
;
267 static uint16_t last_prio
= 0;
268 size_t key_len
= sizeof(struct tc_flower_key
);
269 size_t hash
= hash_int((OVS_FORCE
uint32_t) flower
->key
.eth_type
, 0);
270 struct prio_map_data
*data
;
271 struct prio_map_data
*new_data
;
273 if (!multi_mask_per_prio
) {
274 hash
= hash_bytes(&flower
->mask
, key_len
, hash
);
277 /* We can use the same prio for same mask/eth combination but must have
278 * different prio if not. Flower classifier will reject same prio for
279 * different mask combination unless multi mask per prio is supported. */
280 ovs_mutex_lock(&prios_lock
);
281 HMAP_FOR_EACH_WITH_HASH(data
, node
, hash
, &prios
) {
282 if ((multi_mask_per_prio
283 || !memcmp(&flower
->mask
, &data
->mask
, key_len
))
284 && data
->protocol
== flower
->key
.eth_type
) {
285 ovs_mutex_unlock(&prios_lock
);
290 if (last_prio
== UINT16_MAX
) {
291 /* last_prio can overflow if there will be many different kinds of
292 * flows which shouldn't happen organically. */
293 ovs_mutex_unlock(&prios_lock
);
297 new_data
= xzalloc(sizeof *new_data
);
298 memcpy(&new_data
->mask
, &flower
->mask
, key_len
);
299 new_data
->prio
= ++last_prio
;
300 new_data
->protocol
= flower
->key
.eth_type
;
301 hmap_insert(&prios
, &new_data
->node
, hash
);
302 ovs_mutex_unlock(&prios_lock
);
304 return new_data
->prio
;
308 get_block_id_from_netdev(struct netdev
*netdev
)
311 return netdev_get_block_id(netdev
);
318 netdev_tc_flow_flush(struct netdev
*netdev
)
320 int ifindex
= netdev_get_ifindex(netdev
);
321 uint32_t block_id
= 0;
324 VLOG_ERR_RL(&error_rl
, "flow_flush: failed to get ifindex for %s: %s",
325 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
329 block_id
= get_block_id_from_netdev(netdev
);
331 return tc_flush(ifindex
, block_id
);
335 netdev_tc_flow_dump_create(struct netdev
*netdev
,
336 struct netdev_flow_dump
**dump_out
)
338 struct netdev_flow_dump
*dump
;
339 uint32_t block_id
= 0;
342 ifindex
= netdev_get_ifindex(netdev
);
344 VLOG_ERR_RL(&error_rl
, "dump_create: failed to get ifindex for %s: %s",
345 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
349 block_id
= get_block_id_from_netdev(netdev
);
350 dump
= xzalloc(sizeof *dump
);
351 dump
->nl_dump
= xzalloc(sizeof *dump
->nl_dump
);
352 dump
->netdev
= netdev_ref(netdev
);
353 tc_dump_flower_start(ifindex
, dump
->nl_dump
, block_id
);
361 netdev_tc_flow_dump_destroy(struct netdev_flow_dump
*dump
)
363 nl_dump_done(dump
->nl_dump
);
364 netdev_close(dump
->netdev
);
371 parse_flower_rewrite_to_netlink_action(struct ofpbuf
*buf
,
372 struct tc_flower
*flower
)
374 char *mask
= (char *) &flower
->rewrite
.mask
;
375 char *data
= (char *) &flower
->rewrite
.key
;
377 for (int type
= 0; type
< ARRAY_SIZE(set_flower_map
); type
++) {
380 int len
= ovs_flow_key_attr_lens
[type
].len
;
386 for (int j
= 0; j
< ARRAY_SIZE(set_flower_map
[type
]); j
++) {
387 struct netlink_field
*f
= &set_flower_map
[type
][j
];
393 if (!is_all_zeros(mask
+ f
->flower_offset
, f
->size
)) {
395 nested
= nl_msg_start_nested(buf
,
396 OVS_ACTION_ATTR_SET_MASKED
);
397 put
= nl_msg_put_unspec_zero(buf
, type
, len
* 2);
400 memcpy(put
+ f
->offset
, data
+ f
->flower_offset
, f
->size
);
401 memcpy(put
+ len
+ f
->offset
,
402 mask
+ f
->flower_offset
, f
->size
);
407 nl_msg_end_nested(buf
, nested
);
413 parse_tc_flower_to_match(struct tc_flower
*flower
,
415 struct nlattr
**actions
,
416 struct dpif_flow_stats
*stats
,
417 struct dpif_flow_attrs
*attrs
,
421 struct tc_flower_key
*key
= &flower
->key
;
422 struct tc_flower_key
*mask
= &flower
->mask
;
423 odp_port_t outport
= 0;
424 struct tc_action
*action
;
429 match_init_catchall(match
);
430 match_set_dl_src_masked(match
, key
->src_mac
, mask
->src_mac
);
431 match_set_dl_dst_masked(match
, key
->dst_mac
, mask
->dst_mac
);
433 if (eth_type_vlan(key
->eth_type
)) {
434 match_set_dl_vlan(match
, htons(key
->vlan_id
));
435 match_set_dl_vlan_pcp(match
, key
->vlan_prio
);
436 match_set_dl_type(match
, key
->encap_eth_type
);
437 flow_fix_vlan_tpid(&match
->flow
);
439 match_set_dl_type(match
, key
->eth_type
);
442 if (is_ip_any(&match
->flow
)) {
444 match_set_nw_proto(match
, key
->ip_proto
);
447 match_set_nw_ttl_masked(match
, key
->ip_ttl
, mask
->ip_ttl
);
451 uint8_t flags_mask
= 0;
453 if (mask
->flags
& TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
) {
454 if (key
->flags
& TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
) {
455 flags
|= FLOW_NW_FRAG_ANY
;
457 flags_mask
|= FLOW_NW_FRAG_ANY
;
460 if (mask
->flags
& TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
) {
461 if (!(key
->flags
& TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
)) {
462 flags
|= FLOW_NW_FRAG_LATER
;
464 flags_mask
|= FLOW_NW_FRAG_LATER
;
467 match_set_nw_frag_masked(match
, flags
, flags_mask
);
470 match_set_nw_src_masked(match
, key
->ipv4
.ipv4_src
, mask
->ipv4
.ipv4_src
);
471 match_set_nw_dst_masked(match
, key
->ipv4
.ipv4_dst
, mask
->ipv4
.ipv4_dst
);
473 match_set_ipv6_src_masked(match
,
474 &key
->ipv6
.ipv6_src
, &mask
->ipv6
.ipv6_src
);
475 match_set_ipv6_dst_masked(match
,
476 &key
->ipv6
.ipv6_dst
, &mask
->ipv6
.ipv6_dst
);
478 if (key
->ip_proto
== IPPROTO_TCP
) {
479 match_set_tp_dst_masked(match
, key
->tcp_dst
, mask
->tcp_dst
);
480 match_set_tp_src_masked(match
, key
->tcp_src
, mask
->tcp_src
);
481 match_set_tcp_flags_masked(match
, key
->tcp_flags
, mask
->tcp_flags
);
482 } else if (key
->ip_proto
== IPPROTO_UDP
) {
483 match_set_tp_dst_masked(match
, key
->udp_dst
, mask
->udp_dst
);
484 match_set_tp_src_masked(match
, key
->udp_src
, mask
->udp_src
);
485 } else if (key
->ip_proto
== IPPROTO_SCTP
) {
486 match_set_tp_dst_masked(match
, key
->sctp_dst
, mask
->sctp_dst
);
487 match_set_tp_src_masked(match
, key
->sctp_src
, mask
->sctp_src
);
491 if (flower
->tunnel
.tunnel
) {
492 match_set_tun_id(match
, flower
->tunnel
.id
);
493 if (flower
->tunnel
.ipv4
.ipv4_dst
) {
494 match_set_tun_src(match
, flower
->tunnel
.ipv4
.ipv4_src
);
495 match_set_tun_dst(match
, flower
->tunnel
.ipv4
.ipv4_dst
);
496 } else if (!is_all_zeros(&flower
->tunnel
.ipv6
.ipv6_dst
,
497 sizeof flower
->tunnel
.ipv6
.ipv6_dst
)) {
498 match_set_tun_ipv6_src(match
, &flower
->tunnel
.ipv6
.ipv6_src
);
499 match_set_tun_ipv6_dst(match
, &flower
->tunnel
.ipv6
.ipv6_dst
);
501 if (flower
->tunnel
.tp_dst
) {
502 match_set_tun_tp_dst(match
, flower
->tunnel
.tp_dst
);
506 act_off
= nl_msg_start_nested(buf
, OVS_FLOW_ATTR_ACTIONS
);
508 action
= flower
->actions
;
509 for (i
= 0; i
< flower
->action_count
; i
++, action
++) {
510 switch (action
->type
) {
511 case TC_ACT_VLAN_POP
: {
512 nl_msg_put_flag(buf
, OVS_ACTION_ATTR_POP_VLAN
);
515 case TC_ACT_VLAN_PUSH
: {
516 struct ovs_action_push_vlan
*push
;
518 push
= nl_msg_put_unspec_zero(buf
, OVS_ACTION_ATTR_PUSH_VLAN
,
520 push
->vlan_tpid
= action
->vlan
.vlan_push_tpid
;
521 push
->vlan_tci
= htons(action
->vlan
.vlan_push_id
522 | (action
->vlan
.vlan_push_prio
<< 13)
527 parse_flower_rewrite_to_netlink_action(buf
, flower
);
531 size_t set_offset
= nl_msg_start_nested(buf
, OVS_ACTION_ATTR_SET
);
532 size_t tunnel_offset
=
533 nl_msg_start_nested(buf
, OVS_KEY_ATTR_TUNNEL
);
535 nl_msg_put_be64(buf
, OVS_TUNNEL_KEY_ATTR_ID
, action
->encap
.id
);
536 if (action
->encap
.ipv4
.ipv4_src
) {
537 nl_msg_put_be32(buf
, OVS_TUNNEL_KEY_ATTR_IPV4_SRC
,
538 action
->encap
.ipv4
.ipv4_src
);
540 if (action
->encap
.ipv4
.ipv4_dst
) {
541 nl_msg_put_be32(buf
, OVS_TUNNEL_KEY_ATTR_IPV4_DST
,
542 action
->encap
.ipv4
.ipv4_dst
);
544 if (!is_all_zeros(&action
->encap
.ipv6
.ipv6_src
,
545 sizeof action
->encap
.ipv6
.ipv6_src
)) {
546 nl_msg_put_in6_addr(buf
, OVS_TUNNEL_KEY_ATTR_IPV6_SRC
,
547 &action
->encap
.ipv6
.ipv6_src
);
549 if (!is_all_zeros(&action
->encap
.ipv6
.ipv6_dst
,
550 sizeof action
->encap
.ipv6
.ipv6_dst
)) {
551 nl_msg_put_in6_addr(buf
, OVS_TUNNEL_KEY_ATTR_IPV6_DST
,
552 &action
->encap
.ipv6
.ipv6_dst
);
554 nl_msg_put_be16(buf
, OVS_TUNNEL_KEY_ATTR_TP_DST
,
555 action
->encap
.tp_dst
);
557 nl_msg_end_nested(buf
, tunnel_offset
);
558 nl_msg_end_nested(buf
, set_offset
);
561 case TC_ACT_OUTPUT
: {
562 if (action
->ifindex_out
) {
563 outport
= netdev_ifindex_to_odp_port(action
->ifindex_out
);
568 nl_msg_put_u32(buf
, OVS_ACTION_ATTR_OUTPUT
, odp_to_u32(outport
));
574 nl_msg_end_nested(buf
, act_off
);
576 *actions
= ofpbuf_at_assert(buf
, act_off
, sizeof(struct nlattr
));
579 memset(stats
, 0, sizeof *stats
);
580 stats
->n_packets
= get_32aligned_u64(&flower
->stats
.n_packets
);
581 stats
->n_bytes
= get_32aligned_u64(&flower
->stats
.n_bytes
);
582 stats
->used
= flower
->lastused
;
585 attrs
->offloaded
= (flower
->offloaded_state
== TC_OFFLOADED_STATE_IN_HW
)
586 || (flower
->offloaded_state
== TC_OFFLOADED_STATE_UNDEFINED
);
587 attrs
->dp_layer
= "tc";
593 netdev_tc_flow_dump_next(struct netdev_flow_dump
*dump
,
595 struct nlattr
**actions
,
596 struct dpif_flow_stats
*stats
,
597 struct dpif_flow_attrs
*attrs
,
599 struct ofpbuf
*rbuffer
,
600 struct ofpbuf
*wbuffer
)
602 struct ofpbuf nl_flow
;
604 while (nl_dump_next(dump
->nl_dump
, &nl_flow
, rbuffer
)) {
605 struct tc_flower flower
;
606 struct netdev
*netdev
= dump
->netdev
;
608 if (parse_netlink_to_tc_flower(&nl_flow
, &flower
)) {
612 if (parse_tc_flower_to_match(&flower
, match
, actions
, stats
, attrs
,
617 if (flower
.act_cookie
.len
) {
618 *ufid
= *((ovs_u128
*) flower
.act_cookie
.data
);
619 } else if (!find_ufid(flower
.prio
, flower
.handle
, netdev
, ufid
)) {
623 match
->wc
.masks
.in_port
.odp_port
= u32_to_odp(UINT32_MAX
);
624 match
->flow
.in_port
.odp_port
= dump
->port
;
633 parse_put_flow_set_masked_action(struct tc_flower
*flower
,
634 struct tc_action
*action
,
635 const struct nlattr
*set
,
639 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
640 uint64_t set_stub
[1024 / 8];
641 struct ofpbuf set_buf
= OFPBUF_STUB_INITIALIZER(set_stub
);
642 char *set_data
, *set_mask
;
643 char *key
= (char *) &flower
->rewrite
.key
;
644 char *mask
= (char *) &flower
->rewrite
.mask
;
645 const struct nlattr
*attr
;
649 /* copy so we can set attr mask to 0 for used ovs key struct members */
650 attr
= ofpbuf_put(&set_buf
, set
, set_len
);
652 type
= nl_attr_type(attr
);
653 size
= nl_attr_get_size(attr
) / 2;
654 set_data
= CONST_CAST(char *, nl_attr_get(attr
));
655 set_mask
= set_data
+ size
;
657 if (type
>= ARRAY_SIZE(set_flower_map
)
658 || !set_flower_map
[type
][0].size
) {
659 VLOG_DBG_RL(&rl
, "unsupported set action type: %d", type
);
660 ofpbuf_uninit(&set_buf
);
664 for (i
= 0; i
< ARRAY_SIZE(set_flower_map
[type
]); i
++) {
665 struct netlink_field
*f
= &set_flower_map
[type
][i
];
671 /* copy masked value */
672 for (j
= 0; j
< f
->size
; j
++) {
673 char maskval
= hasmask
? set_mask
[f
->offset
+ j
] : 0xFF;
675 key
[f
->flower_offset
+ j
] = maskval
& set_data
[f
->offset
+ j
];
676 mask
[f
->flower_offset
+ j
] = maskval
;
680 /* set its mask to 0 to show it's been used. */
682 memset(set_mask
+ f
->offset
, 0, f
->size
);
686 if (!is_all_zeros(&flower
->rewrite
, sizeof flower
->rewrite
)) {
687 if (flower
->rewrite
.rewrite
== false) {
688 flower
->rewrite
.rewrite
= true;
689 action
->type
= TC_ACT_PEDIT
;
690 flower
->action_count
++;
694 if (hasmask
&& !is_all_zeros(set_mask
, size
)) {
695 VLOG_DBG_RL(&rl
, "unsupported sub attribute of set action type %d",
697 ofpbuf_uninit(&set_buf
);
701 ofpbuf_uninit(&set_buf
);
706 parse_put_flow_set_action(struct tc_flower
*flower
, struct tc_action
*action
,
707 const struct nlattr
*set
, size_t set_len
)
709 const struct nlattr
*tunnel
;
710 const struct nlattr
*tun_attr
;
711 size_t tun_left
, tunnel_len
;
713 if (nl_attr_type(set
) != OVS_KEY_ATTR_TUNNEL
) {
714 return parse_put_flow_set_masked_action(flower
, action
, set
,
718 tunnel
= nl_attr_get(set
);
719 tunnel_len
= nl_attr_get_size(set
);
721 action
->type
= TC_ACT_ENCAP
;
722 flower
->action_count
++;
723 NL_ATTR_FOR_EACH_UNSAFE(tun_attr
, tun_left
, tunnel
, tunnel_len
) {
724 switch (nl_attr_type(tun_attr
)) {
725 case OVS_TUNNEL_KEY_ATTR_ID
: {
726 action
->encap
.id
= nl_attr_get_be64(tun_attr
);
729 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC
: {
730 action
->encap
.ipv4
.ipv4_src
= nl_attr_get_be32(tun_attr
);
733 case OVS_TUNNEL_KEY_ATTR_IPV4_DST
: {
734 action
->encap
.ipv4
.ipv4_dst
= nl_attr_get_be32(tun_attr
);
737 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC
: {
738 action
->encap
.ipv6
.ipv6_src
=
739 nl_attr_get_in6_addr(tun_attr
);
742 case OVS_TUNNEL_KEY_ATTR_IPV6_DST
: {
743 action
->encap
.ipv6
.ipv6_dst
=
744 nl_attr_get_in6_addr(tun_attr
);
747 case OVS_TUNNEL_KEY_ATTR_TP_SRC
: {
748 action
->encap
.tp_src
= nl_attr_get_be16(tun_attr
);
751 case OVS_TUNNEL_KEY_ATTR_TP_DST
: {
752 action
->encap
.tp_dst
= nl_attr_get_be16(tun_attr
);
762 test_key_and_mask(struct match
*match
)
764 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
765 const struct flow
*key
= &match
->flow
;
766 struct flow
*mask
= &match
->wc
.masks
;
768 if (mask
->pkt_mark
) {
769 VLOG_DBG_RL(&rl
, "offloading attribute pkt_mark isn't supported");
773 if (mask
->recirc_id
&& key
->recirc_id
) {
774 VLOG_DBG_RL(&rl
, "offloading attribute recirc_id isn't supported");
780 VLOG_DBG_RL(&rl
, "offloading attribute dp_hash isn't supported");
785 VLOG_DBG_RL(&rl
, "offloading attribute conj_id isn't supported");
789 if (mask
->skb_priority
) {
790 VLOG_DBG_RL(&rl
, "offloading attribute skb_priority isn't supported");
794 if (mask
->actset_output
) {
796 "offloading attribute actset_output isn't supported");
800 if (mask
->ct_state
) {
801 VLOG_DBG_RL(&rl
, "offloading attribute ct_state isn't supported");
806 VLOG_DBG_RL(&rl
, "offloading attribute ct_zone isn't supported");
811 VLOG_DBG_RL(&rl
, "offloading attribute ct_mark isn't supported");
815 if (mask
->packet_type
&& key
->packet_type
) {
816 VLOG_DBG_RL(&rl
, "offloading attribute packet_type isn't supported");
819 mask
->packet_type
= 0;
821 if (!ovs_u128_is_zero(mask
->ct_label
)) {
822 VLOG_DBG_RL(&rl
, "offloading attribute ct_label isn't supported");
826 for (int i
= 0; i
< FLOW_N_REGS
; i
++) {
829 "offloading attribute regs[%d] isn't supported", i
);
834 if (mask
->metadata
) {
835 VLOG_DBG_RL(&rl
, "offloading attribute metadata isn't supported");
840 VLOG_DBG_RL(&rl
, "offloading attribute nw_tos isn't supported");
844 for (int i
= 0; i
< FLOW_MAX_MPLS_LABELS
; i
++) {
845 if (mask
->mpls_lse
[i
]) {
846 VLOG_DBG_RL(&rl
, "offloading attribute mpls_lse isn't supported");
851 if (key
->dl_type
== htons(ETH_TYPE_IP
) &&
852 key
->nw_proto
== IPPROTO_ICMP
) {
855 "offloading attribute icmp_type isn't supported");
860 "offloading attribute icmp_code isn't supported");
863 } else if (key
->dl_type
== htons(ETH_TYPE_IP
) &&
864 key
->nw_proto
== IPPROTO_IGMP
) {
867 "offloading attribute igmp_type isn't supported");
872 "offloading attribute igmp_code isn't supported");
875 } else if (key
->dl_type
== htons(ETH_TYPE_IPV6
) &&
876 key
->nw_proto
== IPPROTO_ICMPV6
) {
879 "offloading attribute icmp_type isn't supported");
884 "offloading attribute icmp_code isn't supported");
889 if (!is_all_zeros(mask
, sizeof *mask
)) {
890 VLOG_DBG_RL(&rl
, "offloading isn't supported, unknown attribute");
898 netdev_tc_flow_put(struct netdev
*netdev
, struct match
*match
,
899 struct nlattr
*actions
, size_t actions_len
,
900 const ovs_u128
*ufid
, struct offload_info
*info
,
901 struct dpif_flow_stats
*stats OVS_UNUSED
)
903 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
904 struct tc_flower flower
;
905 const struct flow
*key
= &match
->flow
;
906 struct flow
*mask
= &match
->wc
.masks
;
907 const struct flow_tnl
*tnl
= &match
->flow
.tunnel
;
908 struct tc_action
*action
;
909 uint32_t block_id
= 0;
917 ifindex
= netdev_get_ifindex(netdev
);
919 VLOG_ERR_RL(&error_rl
, "flow_put: failed to get ifindex for %s: %s",
920 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
924 memset(&flower
, 0, sizeof flower
);
926 if (flow_tnl_dst_is_set(&key
->tunnel
)) {
928 "tunnel: id %#" PRIx64
" src " IP_FMT
929 " dst " IP_FMT
" tp_src %d tp_dst %d",
931 IP_ARGS(tnl
->ip_src
), IP_ARGS(tnl
->ip_dst
),
932 ntohs(tnl
->tp_src
), ntohs(tnl
->tp_dst
));
933 flower
.tunnel
.id
= tnl
->tun_id
;
934 flower
.tunnel
.ipv4
.ipv4_src
= tnl
->ip_src
;
935 flower
.tunnel
.ipv4
.ipv4_dst
= tnl
->ip_dst
;
936 flower
.tunnel
.ipv6
.ipv6_src
= tnl
->ipv6_src
;
937 flower
.tunnel
.ipv6
.ipv6_dst
= tnl
->ipv6_dst
;
938 flower
.tunnel
.tp_src
= tnl
->tp_src
;
939 flower
.tunnel
.tp_dst
= tnl
->tp_dst
;
940 flower
.tunnel
.tunnel
= true;
942 memset(&mask
->tunnel
, 0, sizeof mask
->tunnel
);
944 flower
.key
.eth_type
= key
->dl_type
;
945 flower
.mask
.eth_type
= mask
->dl_type
;
947 if (mask
->vlans
[0].tci
) {
948 ovs_be16 vid_mask
= mask
->vlans
[0].tci
& htons(VLAN_VID_MASK
);
949 ovs_be16 pcp_mask
= mask
->vlans
[0].tci
& htons(VLAN_PCP_MASK
);
950 ovs_be16 cfi
= mask
->vlans
[0].tci
& htons(VLAN_CFI
);
952 if (cfi
&& key
->vlans
[0].tci
& htons(VLAN_CFI
)
953 && (!vid_mask
|| vid_mask
== htons(VLAN_VID_MASK
))
954 && (!pcp_mask
|| pcp_mask
== htons(VLAN_PCP_MASK
))
955 && (vid_mask
|| pcp_mask
)) {
957 flower
.key
.vlan_id
= vlan_tci_to_vid(key
->vlans
[0].tci
);
958 VLOG_DBG_RL(&rl
, "vlan_id: %d\n", flower
.key
.vlan_id
);
961 flower
.key
.vlan_prio
= vlan_tci_to_pcp(key
->vlans
[0].tci
);
962 VLOG_DBG_RL(&rl
, "vlan_prio: %d\n", flower
.key
.vlan_prio
);
964 flower
.key
.encap_eth_type
= flower
.key
.eth_type
;
965 flower
.key
.eth_type
= key
->vlans
[0].tpid
;
966 } else if (mask
->vlans
[0].tci
== htons(0xffff) &&
967 ntohs(key
->vlans
[0].tci
) == 0) {
968 /* exact && no vlan */
973 } else if (mask
->vlans
[1].tci
) {
976 memset(mask
->vlans
, 0, sizeof mask
->vlans
);
978 flower
.key
.dst_mac
= key
->dl_dst
;
979 flower
.mask
.dst_mac
= mask
->dl_dst
;
980 flower
.key
.src_mac
= key
->dl_src
;
981 flower
.mask
.src_mac
= mask
->dl_src
;
982 memset(&mask
->dl_dst
, 0, sizeof mask
->dl_dst
);
983 memset(&mask
->dl_src
, 0, sizeof mask
->dl_src
);
985 mask
->in_port
.odp_port
= 0;
987 if (is_ip_any(key
)) {
988 flower
.key
.ip_proto
= key
->nw_proto
;
989 flower
.mask
.ip_proto
= mask
->nw_proto
;
990 flower
.key
.ip_ttl
= key
->nw_ttl
;
991 flower
.mask
.ip_ttl
= mask
->nw_ttl
;
993 if (mask
->nw_frag
& FLOW_NW_FRAG_ANY
) {
994 flower
.mask
.flags
|= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
;
996 if (key
->nw_frag
& FLOW_NW_FRAG_ANY
) {
997 flower
.key
.flags
|= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
;
999 if (mask
->nw_frag
& FLOW_NW_FRAG_LATER
) {
1000 flower
.mask
.flags
|= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
;
1002 if (!(key
->nw_frag
& FLOW_NW_FRAG_LATER
)) {
1003 flower
.key
.flags
|= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
;
1011 if (key
->nw_proto
== IPPROTO_TCP
) {
1012 flower
.key
.tcp_dst
= key
->tp_dst
;
1013 flower
.mask
.tcp_dst
= mask
->tp_dst
;
1014 flower
.key
.tcp_src
= key
->tp_src
;
1015 flower
.mask
.tcp_src
= mask
->tp_src
;
1016 flower
.key
.tcp_flags
= key
->tcp_flags
;
1017 flower
.mask
.tcp_flags
= mask
->tcp_flags
;
1020 mask
->tcp_flags
= 0;
1021 } else if (key
->nw_proto
== IPPROTO_UDP
) {
1022 flower
.key
.udp_dst
= key
->tp_dst
;
1023 flower
.mask
.udp_dst
= mask
->tp_dst
;
1024 flower
.key
.udp_src
= key
->tp_src
;
1025 flower
.mask
.udp_src
= mask
->tp_src
;
1028 } else if (key
->nw_proto
== IPPROTO_SCTP
) {
1029 flower
.key
.sctp_dst
= key
->tp_dst
;
1030 flower
.mask
.sctp_dst
= mask
->tp_dst
;
1031 flower
.key
.sctp_src
= key
->tp_src
;
1032 flower
.mask
.sctp_src
= mask
->tp_src
;
1041 if (key
->dl_type
== htons(ETH_P_IP
)) {
1042 flower
.key
.ipv4
.ipv4_src
= key
->nw_src
;
1043 flower
.mask
.ipv4
.ipv4_src
= mask
->nw_src
;
1044 flower
.key
.ipv4
.ipv4_dst
= key
->nw_dst
;
1045 flower
.mask
.ipv4
.ipv4_dst
= mask
->nw_dst
;
1048 } else if (key
->dl_type
== htons(ETH_P_IPV6
)) {
1049 flower
.key
.ipv6
.ipv6_src
= key
->ipv6_src
;
1050 flower
.mask
.ipv6
.ipv6_src
= mask
->ipv6_src
;
1051 flower
.key
.ipv6
.ipv6_dst
= key
->ipv6_dst
;
1052 flower
.mask
.ipv6
.ipv6_dst
= mask
->ipv6_dst
;
1053 memset(&mask
->ipv6_src
, 0, sizeof mask
->ipv6_src
);
1054 memset(&mask
->ipv6_dst
, 0, sizeof mask
->ipv6_dst
);
1058 err
= test_key_and_mask(match
);
1063 NL_ATTR_FOR_EACH(nla
, left
, actions
, actions_len
) {
1064 if (flower
.action_count
>= TCA_ACT_MAX_PRIO
) {
1065 VLOG_DBG_RL(&rl
, "Can only support %d actions", flower
.action_count
);
1068 action
= &flower
.actions
[flower
.action_count
];
1069 if (nl_attr_type(nla
) == OVS_ACTION_ATTR_OUTPUT
) {
1070 odp_port_t port
= nl_attr_get_odp_port(nla
);
1071 struct netdev
*outdev
= netdev_ports_get(port
, info
->dpif_class
);
1073 action
->ifindex_out
= netdev_get_ifindex(outdev
);
1074 action
->type
= TC_ACT_OUTPUT
;
1075 flower
.action_count
++;
1076 netdev_close(outdev
);
1077 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_PUSH_VLAN
) {
1078 const struct ovs_action_push_vlan
*vlan_push
= nl_attr_get(nla
);
1080 action
->vlan
.vlan_push_tpid
= vlan_push
->vlan_tpid
;
1081 action
->vlan
.vlan_push_id
= vlan_tci_to_vid(vlan_push
->vlan_tci
);
1082 action
->vlan
.vlan_push_prio
= vlan_tci_to_pcp(vlan_push
->vlan_tci
);
1083 action
->type
= TC_ACT_VLAN_PUSH
;
1084 flower
.action_count
++;
1085 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_POP_VLAN
) {
1086 action
->type
= TC_ACT_VLAN_POP
;
1087 flower
.action_count
++;
1088 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET
) {
1089 const struct nlattr
*set
= nl_attr_get(nla
);
1090 const size_t set_len
= nl_attr_get_size(nla
);
1092 err
= parse_put_flow_set_action(&flower
, action
, set
, set_len
);
1096 if (action
->type
== TC_ACT_ENCAP
) {
1097 action
->encap
.tp_dst
= info
->tp_dst_port
;
1099 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
) {
1100 const struct nlattr
*set
= nl_attr_get(nla
);
1101 const size_t set_len
= nl_attr_get_size(nla
);
1103 err
= parse_put_flow_set_masked_action(&flower
, action
, set
,
1109 VLOG_DBG_RL(&rl
, "unsupported put action type: %d",
1115 block_id
= get_block_id_from_netdev(netdev
);
1116 handle
= get_ufid_tc_mapping(ufid
, &prio
, NULL
);
1117 if (handle
&& prio
) {
1118 VLOG_DBG_RL(&rl
, "updating old handle: %d prio: %d", handle
, prio
);
1119 tc_del_filter(ifindex
, prio
, handle
, block_id
);
1123 prio
= get_prio_for_tc_flower(&flower
);
1125 VLOG_ERR_RL(&rl
, "couldn't get tc prio: %s", ovs_strerror(ENOSPC
));
1130 flower
.act_cookie
.data
= ufid
;
1131 flower
.act_cookie
.len
= sizeof *ufid
;
1133 err
= tc_replace_flower(ifindex
, prio
, handle
, &flower
, block_id
);
1135 add_ufid_tc_mapping(ufid
, flower
.prio
, flower
.handle
, netdev
, ifindex
);
1142 netdev_tc_flow_get(struct netdev
*netdev OVS_UNUSED
,
1143 struct match
*match
,
1144 struct nlattr
**actions
,
1145 const ovs_u128
*ufid
,
1146 struct dpif_flow_stats
*stats
,
1147 struct dpif_flow_attrs
*attrs
,
1150 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
1152 struct tc_flower flower
;
1153 uint32_t block_id
= 0;
1160 handle
= get_ufid_tc_mapping(ufid
, &prio
, &dev
);
1165 ifindex
= netdev_get_ifindex(dev
);
1167 VLOG_ERR_RL(&error_rl
, "flow_get: failed to get ifindex for %s: %s",
1168 netdev_get_name(dev
), ovs_strerror(-ifindex
));
1173 VLOG_DBG_RL(&rl
, "flow get (dev %s prio %d handle %d)",
1174 netdev_get_name(dev
), prio
, handle
);
1175 block_id
= get_block_id_from_netdev(netdev
);
1176 err
= tc_get_flower(ifindex
, prio
, handle
, &flower
, block_id
);
1179 VLOG_ERR_RL(&error_rl
, "flow get failed (dev %s prio %d handle %d): %s",
1180 netdev_get_name(dev
), prio
, handle
, ovs_strerror(err
));
1184 in_port
= netdev_ifindex_to_odp_port(ifindex
);
1185 parse_tc_flower_to_match(&flower
, match
, actions
, stats
, attrs
, buf
);
1187 match
->wc
.masks
.in_port
.odp_port
= u32_to_odp(UINT32_MAX
);
1188 match
->flow
.in_port
.odp_port
= in_port
;
1194 netdev_tc_flow_del(struct netdev
*netdev OVS_UNUSED
,
1195 const ovs_u128
*ufid
,
1196 struct dpif_flow_stats
*stats
)
1198 struct tc_flower flower
;
1199 uint32_t block_id
= 0;
1206 handle
= get_ufid_tc_mapping(ufid
, &prio
, &dev
);
1211 ifindex
= netdev_get_ifindex(dev
);
1213 VLOG_ERR_RL(&error_rl
, "flow_del: failed to get ifindex for %s: %s",
1214 netdev_get_name(dev
), ovs_strerror(-ifindex
));
1219 block_id
= get_block_id_from_netdev(netdev
);
1222 memset(stats
, 0, sizeof *stats
);
1223 if (!tc_get_flower(ifindex
, prio
, handle
, &flower
, block_id
)) {
1224 stats
->n_packets
= get_32aligned_u64(&flower
.stats
.n_packets
);
1225 stats
->n_bytes
= get_32aligned_u64(&flower
.stats
.n_bytes
);
1226 stats
->used
= flower
.lastused
;
1230 error
= tc_del_filter(ifindex
, prio
, handle
, block_id
);
1231 del_ufid_tc_mapping(ufid
);
1239 probe_multi_mask_per_prio(int ifindex
)
1241 struct tc_flower flower
;
1245 error
= tc_add_del_ingress_qdisc(ifindex
, true, block_id
);
1250 memset(&flower
, 0, sizeof flower
);
1252 flower
.key
.eth_type
= htons(ETH_P_IP
);
1253 flower
.mask
.eth_type
= OVS_BE16_MAX
;
1254 memset(&flower
.key
.dst_mac
, 0x11, sizeof flower
.key
.dst_mac
);
1255 memset(&flower
.mask
.dst_mac
, 0xff, sizeof flower
.mask
.dst_mac
);
1257 error
= tc_replace_flower(ifindex
, 1, 1, &flower
, block_id
);
1262 memset(&flower
.key
.src_mac
, 0x11, sizeof flower
.key
.src_mac
);
1263 memset(&flower
.mask
.src_mac
, 0xff, sizeof flower
.mask
.src_mac
);
1265 error
= tc_replace_flower(ifindex
, 1, 2, &flower
, block_id
);
1266 tc_del_filter(ifindex
, 1, 1, block_id
);
1272 tc_del_filter(ifindex
, 1, 2, block_id
);
1274 multi_mask_per_prio
= true;
1275 VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
1278 tc_add_del_ingress_qdisc(ifindex
, false, block_id
);
1282 probe_tc_block_support(int ifindex
)
1284 uint32_t block_id
= 1;
1287 error
= tc_add_del_ingress_qdisc(ifindex
, true, block_id
);
1292 tc_add_del_ingress_qdisc(ifindex
, false, block_id
);
1294 block_support
= true;
1295 VLOG_INFO("probe tc: block offload is supported.");
1299 netdev_tc_init_flow_api(struct netdev
*netdev
)
1301 static struct ovsthread_once multi_mask_once
= OVSTHREAD_ONCE_INITIALIZER
;
1302 static struct ovsthread_once block_once
= OVSTHREAD_ONCE_INITIALIZER
;
1303 uint32_t block_id
= 0;
1307 ifindex
= netdev_get_ifindex(netdev
);
1309 VLOG_ERR_RL(&error_rl
, "init: failed to get ifindex for %s: %s",
1310 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
1314 if (ovsthread_once_start(&block_once
)) {
1315 probe_tc_block_support(ifindex
);
1316 ovsthread_once_done(&block_once
);
1319 if (ovsthread_once_start(&multi_mask_once
)) {
1320 probe_multi_mask_per_prio(ifindex
);
1321 ovsthread_once_done(&multi_mask_once
);
1324 block_id
= get_block_id_from_netdev(netdev
);
1325 error
= tc_add_del_ingress_qdisc(ifindex
, true, block_id
);
1327 if (error
&& error
!= EEXIST
) {
1328 VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
1329 ovs_strerror(error
));
1333 VLOG_INFO("added ingress qdisc to %s", netdev_get_name(netdev
));