2 * Copyright (c) 2016 Mellanox Technologies, Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "netdev-tc-offloads.h"
21 #include <linux/if_ether.h>
25 #include "openvswitch/hmap.h"
26 #include "openvswitch/match.h"
27 #include "openvswitch/ofpbuf.h"
28 #include "openvswitch/thread.h"
29 #include "openvswitch/types.h"
30 #include "openvswitch/util.h"
31 #include "openvswitch/vlog.h"
32 #include "netdev-linux.h"
34 #include "netlink-socket.h"
35 #include "odp-netlink.h"
38 #include "unaligned.h"
41 VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads
);
43 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
45 static struct hmap ufid_tc
= HMAP_INITIALIZER(&ufid_tc
);
46 static bool multi_mask_per_prio
= false;
48 struct netlink_field
{
54 static struct netlink_field set_flower_map
[][3] = {
55 [OVS_KEY_ATTR_IPV4
] = {
56 { offsetof(struct ovs_key_ipv4
, ipv4_src
),
57 offsetof(struct tc_flower_key
, ipv4
.ipv4_src
),
58 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.ipv4_src
)
60 { offsetof(struct ovs_key_ipv4
, ipv4_dst
),
61 offsetof(struct tc_flower_key
, ipv4
.ipv4_dst
),
62 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.ipv4_dst
)
64 { offsetof(struct ovs_key_ipv4
, ipv4_ttl
),
65 offsetof(struct tc_flower_key
, ipv4
.rewrite_ttl
),
66 MEMBER_SIZEOF(struct tc_flower_key
, ipv4
.rewrite_ttl
)
69 [OVS_KEY_ATTR_IPV6
] = {
70 { offsetof(struct ovs_key_ipv6
, ipv6_src
),
71 offsetof(struct tc_flower_key
, ipv6
.ipv6_src
),
72 MEMBER_SIZEOF(struct tc_flower_key
, ipv6
.ipv6_src
)
74 { offsetof(struct ovs_key_ipv6
, ipv6_dst
),
75 offsetof(struct tc_flower_key
, ipv6
.ipv6_dst
),
76 MEMBER_SIZEOF(struct tc_flower_key
, ipv6
.ipv6_dst
)
79 [OVS_KEY_ATTR_ETHERNET
] = {
80 { offsetof(struct ovs_key_ethernet
, eth_src
),
81 offsetof(struct tc_flower_key
, src_mac
),
82 MEMBER_SIZEOF(struct tc_flower_key
, src_mac
)
84 { offsetof(struct ovs_key_ethernet
, eth_dst
),
85 offsetof(struct tc_flower_key
, dst_mac
),
86 MEMBER_SIZEOF(struct tc_flower_key
, dst_mac
)
89 [OVS_KEY_ATTR_ETHERTYPE
] = {
91 offsetof(struct tc_flower_key
, eth_type
),
92 MEMBER_SIZEOF(struct tc_flower_key
, eth_type
)
95 [OVS_KEY_ATTR_TCP
] = {
96 { offsetof(struct ovs_key_tcp
, tcp_src
),
97 offsetof(struct tc_flower_key
, tcp_src
),
98 MEMBER_SIZEOF(struct tc_flower_key
, tcp_src
)
100 { offsetof(struct ovs_key_tcp
, tcp_dst
),
101 offsetof(struct tc_flower_key
, tcp_dst
),
102 MEMBER_SIZEOF(struct tc_flower_key
, tcp_dst
)
105 [OVS_KEY_ATTR_UDP
] = {
106 { offsetof(struct ovs_key_udp
, udp_src
),
107 offsetof(struct tc_flower_key
, udp_src
),
108 MEMBER_SIZEOF(struct tc_flower_key
, udp_src
)
110 { offsetof(struct ovs_key_udp
, udp_dst
),
111 offsetof(struct tc_flower_key
, udp_dst
),
112 MEMBER_SIZEOF(struct tc_flower_key
, udp_dst
)
117 static struct ovs_mutex ufid_lock
= OVS_MUTEX_INITIALIZER
;
120 * struct ufid_tc_data - data entry for ufid_tc hmap.
121 * @ufid_node: Element in @ufid_tc hash table by ufid key.
122 * @tc_node: Element in @ufid_tc hash table by prio/handle/ifindex key.
123 * @ufid: ufid assigned to the flow
126 * @ifindex: netdev ifindex.
127 * @netdev: netdev associated with the tc rule
129 struct ufid_tc_data
{
130 struct hmap_node ufid_node
;
131 struct hmap_node tc_node
;
136 struct netdev
*netdev
;
139 /* Remove matching ufid entry from ufid_tc hashmap. */
141 del_ufid_tc_mapping(const ovs_u128
*ufid
)
143 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
144 struct ufid_tc_data
*data
;
146 ovs_mutex_lock(&ufid_lock
);
147 HMAP_FOR_EACH_WITH_HASH(data
, ufid_node
, ufid_hash
, &ufid_tc
) {
148 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
154 ovs_mutex_unlock(&ufid_lock
);
158 hmap_remove(&ufid_tc
, &data
->ufid_node
);
159 hmap_remove(&ufid_tc
, &data
->tc_node
);
160 netdev_close(data
->netdev
);
162 ovs_mutex_unlock(&ufid_lock
);
165 /* Add ufid entry to ufid_tc hashmap.
166 * If entry exists already it will be replaced. */
168 add_ufid_tc_mapping(const ovs_u128
*ufid
, int prio
, int handle
,
169 struct netdev
*netdev
, int ifindex
)
171 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
172 size_t tc_hash
= hash_int(hash_int(prio
, handle
), ifindex
);
173 struct ufid_tc_data
*new_data
= xzalloc(sizeof *new_data
);
175 del_ufid_tc_mapping(ufid
);
177 new_data
->ufid
= *ufid
;
178 new_data
->prio
= prio
;
179 new_data
->handle
= handle
;
180 new_data
->netdev
= netdev_ref(netdev
);
181 new_data
->ifindex
= ifindex
;
183 ovs_mutex_lock(&ufid_lock
);
184 hmap_insert(&ufid_tc
, &new_data
->ufid_node
, ufid_hash
);
185 hmap_insert(&ufid_tc
, &new_data
->tc_node
, tc_hash
);
186 ovs_mutex_unlock(&ufid_lock
);
189 /* Get ufid from ufid_tc hashmap.
191 * If netdev output param is not NULL then the function will return
192 * associated netdev on success and a refcount is taken on that netdev.
193 * The caller is then responsible to close the netdev.
195 * Returns handle if successful and fill prio and netdev for that ufid.
196 * Otherwise returns 0.
199 get_ufid_tc_mapping(const ovs_u128
*ufid
, int *prio
, struct netdev
**netdev
)
201 size_t ufid_hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
202 struct ufid_tc_data
*data
;
205 ovs_mutex_lock(&ufid_lock
);
206 HMAP_FOR_EACH_WITH_HASH(data
, ufid_node
, ufid_hash
, &ufid_tc
) {
207 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
212 *netdev
= netdev_ref(data
->netdev
);
214 handle
= data
->handle
;
218 ovs_mutex_unlock(&ufid_lock
);
223 /* Find ufid entry in ufid_tc hashmap using prio, handle and netdev.
224 * The result is saved in ufid.
226 * Returns true on success.
229 find_ufid(int prio
, int handle
, struct netdev
*netdev
, ovs_u128
*ufid
)
231 int ifindex
= netdev_get_ifindex(netdev
);
232 struct ufid_tc_data
*data
;
233 size_t tc_hash
= hash_int(hash_int(prio
, handle
), ifindex
);
235 ovs_mutex_lock(&ufid_lock
);
236 HMAP_FOR_EACH_WITH_HASH(data
, tc_node
, tc_hash
, &ufid_tc
) {
237 if (data
->prio
== prio
&& data
->handle
== handle
238 && data
->ifindex
== ifindex
) {
243 ovs_mutex_unlock(&ufid_lock
);
245 return (data
!= NULL
);
248 struct prio_map_data
{
249 struct hmap_node node
;
250 struct tc_flower_key mask
;
255 /* Get free prio for tc flower
256 * If prio is already allocated for mask/eth_type combination then return it.
257 * If not assign new prio.
259 * Return prio on success or 0 if we are out of prios.
262 get_prio_for_tc_flower(struct tc_flower
*flower
)
264 static struct hmap prios
= HMAP_INITIALIZER(&prios
);
265 static struct ovs_mutex prios_lock
= OVS_MUTEX_INITIALIZER
;
266 static uint16_t last_prio
= 0;
267 size_t key_len
= sizeof(struct tc_flower_key
);
268 size_t hash
= hash_int((OVS_FORCE
uint32_t) flower
->key
.eth_type
, 0);
269 struct prio_map_data
*data
;
270 struct prio_map_data
*new_data
;
272 if (!multi_mask_per_prio
) {
273 hash
= hash_bytes(&flower
->mask
, key_len
, hash
);
276 /* We can use the same prio for same mask/eth combination but must have
277 * different prio if not. Flower classifier will reject same prio for
278 * different mask combination unless multi mask per prio is supported. */
279 ovs_mutex_lock(&prios_lock
);
280 HMAP_FOR_EACH_WITH_HASH(data
, node
, hash
, &prios
) {
281 if ((multi_mask_per_prio
282 || !memcmp(&flower
->mask
, &data
->mask
, key_len
))
283 && data
->protocol
== flower
->key
.eth_type
) {
284 ovs_mutex_unlock(&prios_lock
);
289 if (last_prio
== UINT16_MAX
) {
290 /* last_prio can overflow if there will be many different kinds of
291 * flows which shouldn't happen organically. */
292 ovs_mutex_unlock(&prios_lock
);
296 new_data
= xzalloc(sizeof *new_data
);
297 memcpy(&new_data
->mask
, &flower
->mask
, key_len
);
298 new_data
->prio
= ++last_prio
;
299 new_data
->protocol
= flower
->key
.eth_type
;
300 hmap_insert(&prios
, &new_data
->node
, hash
);
301 ovs_mutex_unlock(&prios_lock
);
303 return new_data
->prio
;
307 netdev_tc_flow_flush(struct netdev
*netdev
)
309 int ifindex
= netdev_get_ifindex(netdev
);
312 VLOG_ERR_RL(&error_rl
, "flow_flush: failed to get ifindex for %s: %s",
313 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
317 return tc_flush(ifindex
);
321 netdev_tc_flow_dump_create(struct netdev
*netdev
,
322 struct netdev_flow_dump
**dump_out
)
324 struct netdev_flow_dump
*dump
;
327 ifindex
= netdev_get_ifindex(netdev
);
329 VLOG_ERR_RL(&error_rl
, "dump_create: failed to get ifindex for %s: %s",
330 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
334 dump
= xzalloc(sizeof *dump
);
335 dump
->nl_dump
= xzalloc(sizeof *dump
->nl_dump
);
336 dump
->netdev
= netdev_ref(netdev
);
337 tc_dump_flower_start(ifindex
, dump
->nl_dump
);
345 netdev_tc_flow_dump_destroy(struct netdev_flow_dump
*dump
)
347 nl_dump_done(dump
->nl_dump
);
348 netdev_close(dump
->netdev
);
355 parse_flower_rewrite_to_netlink_action(struct ofpbuf
*buf
,
356 struct tc_flower
*flower
)
358 char *mask
= (char *) &flower
->rewrite
.mask
;
359 char *data
= (char *) &flower
->rewrite
.key
;
361 for (int type
= 0; type
< ARRAY_SIZE(set_flower_map
); type
++) {
364 int len
= ovs_flow_key_attr_lens
[type
].len
;
370 for (int j
= 0; j
< ARRAY_SIZE(set_flower_map
[type
]); j
++) {
371 struct netlink_field
*f
= &set_flower_map
[type
][j
];
377 if (!is_all_zeros(mask
+ f
->flower_offset
, f
->size
)) {
379 nested
= nl_msg_start_nested(buf
,
380 OVS_ACTION_ATTR_SET_MASKED
);
381 put
= nl_msg_put_unspec_zero(buf
, type
, len
* 2);
384 memcpy(put
+ f
->offset
, data
+ f
->flower_offset
, f
->size
);
385 memcpy(put
+ len
+ f
->offset
,
386 mask
+ f
->flower_offset
, f
->size
);
391 nl_msg_end_nested(buf
, nested
);
397 parse_tc_flower_to_match(struct tc_flower
*flower
,
399 struct nlattr
**actions
,
400 struct dpif_flow_stats
*stats
,
404 struct tc_flower_key
*key
= &flower
->key
;
405 struct tc_flower_key
*mask
= &flower
->mask
;
406 odp_port_t outport
= 0;
407 struct tc_action
*action
;
412 match_init_catchall(match
);
413 match_set_dl_src_masked(match
, key
->src_mac
, mask
->src_mac
);
414 match_set_dl_dst_masked(match
, key
->dst_mac
, mask
->dst_mac
);
416 if (key
->eth_type
== htons(ETH_TYPE_VLAN
)) {
417 match_set_dl_vlan(match
, htons(key
->vlan_id
));
418 match_set_dl_vlan_pcp(match
, key
->vlan_prio
);
419 match_set_dl_type(match
, key
->encap_eth_type
);
420 flow_fix_vlan_tpid(&match
->flow
);
422 match_set_dl_type(match
, key
->eth_type
);
425 if (is_ip_any(&match
->flow
)) {
427 match_set_nw_proto(match
, key
->ip_proto
);
430 match_set_nw_ttl_masked(match
, key
->ip_ttl
, mask
->ip_ttl
);
434 uint8_t flags_mask
= 0;
436 if (mask
->flags
& TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
) {
437 if (key
->flags
& TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
) {
438 flags
|= FLOW_NW_FRAG_ANY
;
440 flags_mask
|= FLOW_NW_FRAG_ANY
;
443 if (mask
->flags
& TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
) {
444 if (!(key
->flags
& TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
)) {
445 flags
|= FLOW_NW_FRAG_LATER
;
447 flags_mask
|= FLOW_NW_FRAG_LATER
;
450 match_set_nw_frag_masked(match
, flags
, flags_mask
);
453 match_set_nw_src_masked(match
, key
->ipv4
.ipv4_src
, mask
->ipv4
.ipv4_src
);
454 match_set_nw_dst_masked(match
, key
->ipv4
.ipv4_dst
, mask
->ipv4
.ipv4_dst
);
456 match_set_ipv6_src_masked(match
,
457 &key
->ipv6
.ipv6_src
, &mask
->ipv6
.ipv6_src
);
458 match_set_ipv6_dst_masked(match
,
459 &key
->ipv6
.ipv6_dst
, &mask
->ipv6
.ipv6_dst
);
461 if (key
->ip_proto
== IPPROTO_TCP
) {
462 match_set_tp_dst_masked(match
, key
->tcp_dst
, mask
->tcp_dst
);
463 match_set_tp_src_masked(match
, key
->tcp_src
, mask
->tcp_src
);
464 match_set_tcp_flags_masked(match
, key
->tcp_flags
, mask
->tcp_flags
);
465 } else if (key
->ip_proto
== IPPROTO_UDP
) {
466 match_set_tp_dst_masked(match
, key
->udp_dst
, mask
->udp_dst
);
467 match_set_tp_src_masked(match
, key
->udp_src
, mask
->udp_src
);
468 } else if (key
->ip_proto
== IPPROTO_SCTP
) {
469 match_set_tp_dst_masked(match
, key
->sctp_dst
, mask
->sctp_dst
);
470 match_set_tp_src_masked(match
, key
->sctp_src
, mask
->sctp_src
);
474 if (flower
->tunnel
.tunnel
) {
475 match_set_tun_id(match
, flower
->tunnel
.id
);
476 if (flower
->tunnel
.ipv4
.ipv4_dst
) {
477 match_set_tun_src(match
, flower
->tunnel
.ipv4
.ipv4_src
);
478 match_set_tun_dst(match
, flower
->tunnel
.ipv4
.ipv4_dst
);
479 } else if (!is_all_zeros(&flower
->tunnel
.ipv6
.ipv6_dst
,
480 sizeof flower
->tunnel
.ipv6
.ipv6_dst
)) {
481 match_set_tun_ipv6_src(match
, &flower
->tunnel
.ipv6
.ipv6_src
);
482 match_set_tun_ipv6_dst(match
, &flower
->tunnel
.ipv6
.ipv6_dst
);
484 if (flower
->tunnel
.tp_dst
) {
485 match_set_tun_tp_dst(match
, flower
->tunnel
.tp_dst
);
489 act_off
= nl_msg_start_nested(buf
, OVS_FLOW_ATTR_ACTIONS
);
491 action
= flower
->actions
;
492 for (i
= 0; i
< flower
->action_count
; i
++, action
++) {
493 switch (action
->type
) {
494 case TC_ACT_VLAN_POP
: {
495 nl_msg_put_flag(buf
, OVS_ACTION_ATTR_POP_VLAN
);
498 case TC_ACT_VLAN_PUSH
: {
499 struct ovs_action_push_vlan
*push
;
501 push
= nl_msg_put_unspec_zero(buf
, OVS_ACTION_ATTR_PUSH_VLAN
,
503 push
->vlan_tpid
= htons(ETH_TYPE_VLAN
);
504 push
->vlan_tci
= htons(action
->vlan
.vlan_push_id
505 | (action
->vlan
.vlan_push_prio
<< 13)
510 parse_flower_rewrite_to_netlink_action(buf
, flower
);
514 size_t set_offset
= nl_msg_start_nested(buf
, OVS_ACTION_ATTR_SET
);
515 size_t tunnel_offset
=
516 nl_msg_start_nested(buf
, OVS_KEY_ATTR_TUNNEL
);
518 nl_msg_put_be64(buf
, OVS_TUNNEL_KEY_ATTR_ID
, action
->encap
.id
);
519 if (action
->encap
.ipv4
.ipv4_src
) {
520 nl_msg_put_be32(buf
, OVS_TUNNEL_KEY_ATTR_IPV4_SRC
,
521 action
->encap
.ipv4
.ipv4_src
);
523 if (action
->encap
.ipv4
.ipv4_dst
) {
524 nl_msg_put_be32(buf
, OVS_TUNNEL_KEY_ATTR_IPV4_DST
,
525 action
->encap
.ipv4
.ipv4_dst
);
527 if (!is_all_zeros(&action
->encap
.ipv6
.ipv6_src
,
528 sizeof action
->encap
.ipv6
.ipv6_src
)) {
529 nl_msg_put_in6_addr(buf
, OVS_TUNNEL_KEY_ATTR_IPV6_SRC
,
530 &action
->encap
.ipv6
.ipv6_src
);
532 if (!is_all_zeros(&action
->encap
.ipv6
.ipv6_dst
,
533 sizeof action
->encap
.ipv6
.ipv6_dst
)) {
534 nl_msg_put_in6_addr(buf
, OVS_TUNNEL_KEY_ATTR_IPV6_DST
,
535 &action
->encap
.ipv6
.ipv6_dst
);
537 nl_msg_put_be16(buf
, OVS_TUNNEL_KEY_ATTR_TP_DST
,
538 action
->encap
.tp_dst
);
540 nl_msg_end_nested(buf
, tunnel_offset
);
541 nl_msg_end_nested(buf
, set_offset
);
544 case TC_ACT_OUTPUT
: {
545 if (action
->ifindex_out
) {
546 outport
= netdev_ifindex_to_odp_port(action
->ifindex_out
);
551 nl_msg_put_u32(buf
, OVS_ACTION_ATTR_OUTPUT
, odp_to_u32(outport
));
557 nl_msg_end_nested(buf
, act_off
);
559 *actions
= ofpbuf_at_assert(buf
, act_off
, sizeof(struct nlattr
));
562 memset(stats
, 0, sizeof *stats
);
563 stats
->n_packets
= get_32aligned_u64(&flower
->stats
.n_packets
);
564 stats
->n_bytes
= get_32aligned_u64(&flower
->stats
.n_bytes
);
565 stats
->used
= flower
->lastused
;
572 netdev_tc_flow_dump_next(struct netdev_flow_dump
*dump
,
574 struct nlattr
**actions
,
575 struct dpif_flow_stats
*stats
,
577 struct ofpbuf
*rbuffer
,
578 struct ofpbuf
*wbuffer
)
580 struct ofpbuf nl_flow
;
582 while (nl_dump_next(dump
->nl_dump
, &nl_flow
, rbuffer
)) {
583 struct tc_flower flower
;
584 struct netdev
*netdev
= dump
->netdev
;
586 if (parse_netlink_to_tc_flower(&nl_flow
, &flower
)) {
590 if (parse_tc_flower_to_match(&flower
, match
, actions
, stats
,
595 if (flower
.act_cookie
.len
) {
596 *ufid
= *((ovs_u128
*) flower
.act_cookie
.data
);
597 } else if (!find_ufid(flower
.prio
, flower
.handle
, netdev
, ufid
)) {
601 match
->wc
.masks
.in_port
.odp_port
= u32_to_odp(UINT32_MAX
);
602 match
->flow
.in_port
.odp_port
= dump
->port
;
611 parse_put_flow_set_masked_action(struct tc_flower
*flower
,
612 struct tc_action
*action
,
613 const struct nlattr
*set
,
617 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
618 uint64_t set_stub
[1024 / 8];
619 struct ofpbuf set_buf
= OFPBUF_STUB_INITIALIZER(set_stub
);
620 char *set_data
, *set_mask
;
621 char *key
= (char *) &flower
->rewrite
.key
;
622 char *mask
= (char *) &flower
->rewrite
.mask
;
623 const struct nlattr
*attr
;
627 /* copy so we can set attr mask to 0 for used ovs key struct members */
628 attr
= ofpbuf_put(&set_buf
, set
, set_len
);
630 type
= nl_attr_type(attr
);
631 size
= nl_attr_get_size(attr
) / 2;
632 set_data
= CONST_CAST(char *, nl_attr_get(attr
));
633 set_mask
= set_data
+ size
;
635 if (type
>= ARRAY_SIZE(set_flower_map
)
636 || !set_flower_map
[type
][0].size
) {
637 VLOG_DBG_RL(&rl
, "unsupported set action type: %d", type
);
638 ofpbuf_uninit(&set_buf
);
642 for (i
= 0; i
< ARRAY_SIZE(set_flower_map
[type
]); i
++) {
643 struct netlink_field
*f
= &set_flower_map
[type
][i
];
649 /* copy masked value */
650 for (j
= 0; j
< f
->size
; j
++) {
651 char maskval
= hasmask
? set_mask
[f
->offset
+ j
] : 0xFF;
653 key
[f
->flower_offset
+ j
] = maskval
& set_data
[f
->offset
+ j
];
654 mask
[f
->flower_offset
+ j
] = maskval
;
658 /* set its mask to 0 to show it's been used. */
660 memset(set_mask
+ f
->offset
, 0, f
->size
);
664 if (!is_all_zeros(&flower
->rewrite
, sizeof flower
->rewrite
)) {
665 if (flower
->rewrite
.rewrite
== false) {
666 flower
->rewrite
.rewrite
= true;
667 action
->type
= TC_ACT_PEDIT
;
668 flower
->action_count
++;
672 if (hasmask
&& !is_all_zeros(set_mask
, size
)) {
673 VLOG_DBG_RL(&rl
, "unsupported sub attribute of set action type %d",
675 ofpbuf_uninit(&set_buf
);
679 ofpbuf_uninit(&set_buf
);
684 parse_put_flow_set_action(struct tc_flower
*flower
, struct tc_action
*action
,
685 const struct nlattr
*set
, size_t set_len
)
687 const struct nlattr
*tunnel
;
688 const struct nlattr
*tun_attr
;
689 size_t tun_left
, tunnel_len
;
691 if (nl_attr_type(set
) != OVS_KEY_ATTR_TUNNEL
) {
692 return parse_put_flow_set_masked_action(flower
, action
, set
,
696 tunnel
= nl_attr_get(set
);
697 tunnel_len
= nl_attr_get_size(set
);
699 action
->type
= TC_ACT_ENCAP
;
700 flower
->action_count
++;
701 NL_ATTR_FOR_EACH_UNSAFE(tun_attr
, tun_left
, tunnel
, tunnel_len
) {
702 switch (nl_attr_type(tun_attr
)) {
703 case OVS_TUNNEL_KEY_ATTR_ID
: {
704 action
->encap
.id
= nl_attr_get_be64(tun_attr
);
707 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC
: {
708 action
->encap
.ipv4
.ipv4_src
= nl_attr_get_be32(tun_attr
);
711 case OVS_TUNNEL_KEY_ATTR_IPV4_DST
: {
712 action
->encap
.ipv4
.ipv4_dst
= nl_attr_get_be32(tun_attr
);
715 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC
: {
716 action
->encap
.ipv6
.ipv6_src
=
717 nl_attr_get_in6_addr(tun_attr
);
720 case OVS_TUNNEL_KEY_ATTR_IPV6_DST
: {
721 action
->encap
.ipv6
.ipv6_dst
=
722 nl_attr_get_in6_addr(tun_attr
);
725 case OVS_TUNNEL_KEY_ATTR_TP_SRC
: {
726 action
->encap
.tp_src
= nl_attr_get_be16(tun_attr
);
729 case OVS_TUNNEL_KEY_ATTR_TP_DST
: {
730 action
->encap
.tp_dst
= nl_attr_get_be16(tun_attr
);
740 test_key_and_mask(struct match
*match
)
742 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
743 const struct flow
*key
= &match
->flow
;
744 struct flow
*mask
= &match
->wc
.masks
;
746 if (mask
->pkt_mark
) {
747 VLOG_DBG_RL(&rl
, "offloading attribute pkt_mark isn't supported");
751 if (mask
->recirc_id
&& key
->recirc_id
) {
752 VLOG_DBG_RL(&rl
, "offloading attribute recirc_id isn't supported");
758 VLOG_DBG_RL(&rl
, "offloading attribute dp_hash isn't supported");
763 VLOG_DBG_RL(&rl
, "offloading attribute conj_id isn't supported");
767 if (mask
->skb_priority
) {
768 VLOG_DBG_RL(&rl
, "offloading attribute skb_priority isn't supported");
772 if (mask
->actset_output
) {
774 "offloading attribute actset_output isn't supported");
778 if (mask
->ct_state
) {
779 VLOG_DBG_RL(&rl
, "offloading attribute ct_state isn't supported");
784 VLOG_DBG_RL(&rl
, "offloading attribute ct_zone isn't supported");
789 VLOG_DBG_RL(&rl
, "offloading attribute ct_mark isn't supported");
793 if (mask
->packet_type
&& key
->packet_type
) {
794 VLOG_DBG_RL(&rl
, "offloading attribute packet_type isn't supported");
797 mask
->packet_type
= 0;
799 if (!ovs_u128_is_zero(mask
->ct_label
)) {
800 VLOG_DBG_RL(&rl
, "offloading attribute ct_label isn't supported");
804 for (int i
= 0; i
< FLOW_N_REGS
; i
++) {
807 "offloading attribute regs[%d] isn't supported", i
);
812 if (mask
->metadata
) {
813 VLOG_DBG_RL(&rl
, "offloading attribute metadata isn't supported");
818 VLOG_DBG_RL(&rl
, "offloading attribute nw_tos isn't supported");
822 for (int i
= 0; i
< FLOW_MAX_MPLS_LABELS
; i
++) {
823 if (mask
->mpls_lse
[i
]) {
824 VLOG_DBG_RL(&rl
, "offloading attribute mpls_lse isn't supported");
829 if (key
->dl_type
== htons(ETH_TYPE_IP
) &&
830 key
->nw_proto
== IPPROTO_ICMP
) {
833 "offloading attribute icmp_type isn't supported");
838 "offloading attribute icmp_code isn't supported");
841 } else if (key
->dl_type
== htons(ETH_TYPE_IP
) &&
842 key
->nw_proto
== IPPROTO_IGMP
) {
845 "offloading attribute igmp_type isn't supported");
850 "offloading attribute igmp_code isn't supported");
853 } else if (key
->dl_type
== htons(ETH_TYPE_IPV6
) &&
854 key
->nw_proto
== IPPROTO_ICMPV6
) {
857 "offloading attribute icmp_type isn't supported");
862 "offloading attribute icmp_code isn't supported");
867 if (!is_all_zeros(mask
, sizeof *mask
)) {
868 VLOG_DBG_RL(&rl
, "offloading isn't supported, unknown attribute");
876 netdev_tc_flow_put(struct netdev
*netdev
, struct match
*match
,
877 struct nlattr
*actions
, size_t actions_len
,
878 const ovs_u128
*ufid
, struct offload_info
*info
,
879 struct dpif_flow_stats
*stats OVS_UNUSED
)
881 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
882 struct tc_flower flower
;
883 const struct flow
*key
= &match
->flow
;
884 struct flow
*mask
= &match
->wc
.masks
;
885 const struct flow_tnl
*tnl
= &match
->flow
.tunnel
;
886 struct tc_action
*action
;
894 ifindex
= netdev_get_ifindex(netdev
);
896 VLOG_ERR_RL(&error_rl
, "flow_put: failed to get ifindex for %s: %s",
897 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
901 memset(&flower
, 0, sizeof flower
);
903 if (flow_tnl_dst_is_set(&key
->tunnel
)) {
905 "tunnel: id %#" PRIx64
" src " IP_FMT
906 " dst " IP_FMT
" tp_src %d tp_dst %d",
908 IP_ARGS(tnl
->ip_src
), IP_ARGS(tnl
->ip_dst
),
909 ntohs(tnl
->tp_src
), ntohs(tnl
->tp_dst
));
910 flower
.tunnel
.id
= tnl
->tun_id
;
911 flower
.tunnel
.ipv4
.ipv4_src
= tnl
->ip_src
;
912 flower
.tunnel
.ipv4
.ipv4_dst
= tnl
->ip_dst
;
913 flower
.tunnel
.ipv6
.ipv6_src
= tnl
->ipv6_src
;
914 flower
.tunnel
.ipv6
.ipv6_dst
= tnl
->ipv6_dst
;
915 flower
.tunnel
.tp_src
= tnl
->tp_src
;
916 flower
.tunnel
.tp_dst
= tnl
->tp_dst
;
917 flower
.tunnel
.tunnel
= true;
919 memset(&mask
->tunnel
, 0, sizeof mask
->tunnel
);
921 flower
.key
.eth_type
= key
->dl_type
;
922 flower
.mask
.eth_type
= mask
->dl_type
;
924 if (mask
->vlans
[0].tci
) {
925 ovs_be16 vid_mask
= mask
->vlans
[0].tci
& htons(VLAN_VID_MASK
);
926 ovs_be16 pcp_mask
= mask
->vlans
[0].tci
& htons(VLAN_PCP_MASK
);
927 ovs_be16 cfi
= mask
->vlans
[0].tci
& htons(VLAN_CFI
);
929 if (cfi
&& key
->vlans
[0].tci
& htons(VLAN_CFI
)
930 && (!vid_mask
|| vid_mask
== htons(VLAN_VID_MASK
))
931 && (!pcp_mask
|| pcp_mask
== htons(VLAN_PCP_MASK
))
932 && (vid_mask
|| pcp_mask
)) {
934 flower
.key
.vlan_id
= vlan_tci_to_vid(key
->vlans
[0].tci
);
935 VLOG_DBG_RL(&rl
, "vlan_id: %d\n", flower
.key
.vlan_id
);
938 flower
.key
.vlan_prio
= vlan_tci_to_pcp(key
->vlans
[0].tci
);
939 VLOG_DBG_RL(&rl
, "vlan_prio: %d\n", flower
.key
.vlan_prio
);
941 flower
.key
.encap_eth_type
= flower
.key
.eth_type
;
942 flower
.key
.eth_type
= htons(ETH_TYPE_VLAN
);
943 } else if (mask
->vlans
[0].tci
== htons(0xffff) &&
944 ntohs(key
->vlans
[0].tci
) == 0) {
945 /* exact && no vlan */
950 } else if (mask
->vlans
[1].tci
) {
953 memset(mask
->vlans
, 0, sizeof mask
->vlans
);
955 flower
.key
.dst_mac
= key
->dl_dst
;
956 flower
.mask
.dst_mac
= mask
->dl_dst
;
957 flower
.key
.src_mac
= key
->dl_src
;
958 flower
.mask
.src_mac
= mask
->dl_src
;
959 memset(&mask
->dl_dst
, 0, sizeof mask
->dl_dst
);
960 memset(&mask
->dl_src
, 0, sizeof mask
->dl_src
);
962 mask
->in_port
.odp_port
= 0;
964 if (is_ip_any(key
)) {
965 flower
.key
.ip_proto
= key
->nw_proto
;
966 flower
.mask
.ip_proto
= mask
->nw_proto
;
967 flower
.key
.ip_ttl
= key
->nw_ttl
;
968 flower
.mask
.ip_ttl
= mask
->nw_ttl
;
970 if (mask
->nw_frag
& FLOW_NW_FRAG_ANY
) {
971 flower
.mask
.flags
|= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
;
973 if (key
->nw_frag
& FLOW_NW_FRAG_ANY
) {
974 flower
.key
.flags
|= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
;
976 if (mask
->nw_frag
& FLOW_NW_FRAG_LATER
) {
977 flower
.mask
.flags
|= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
;
979 if (!(key
->nw_frag
& FLOW_NW_FRAG_LATER
)) {
980 flower
.key
.flags
|= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST
;
988 if (key
->nw_proto
== IPPROTO_TCP
) {
989 flower
.key
.tcp_dst
= key
->tp_dst
;
990 flower
.mask
.tcp_dst
= mask
->tp_dst
;
991 flower
.key
.tcp_src
= key
->tp_src
;
992 flower
.mask
.tcp_src
= mask
->tp_src
;
993 flower
.key
.tcp_flags
= key
->tcp_flags
;
994 flower
.mask
.tcp_flags
= mask
->tcp_flags
;
998 } else if (key
->nw_proto
== IPPROTO_UDP
) {
999 flower
.key
.udp_dst
= key
->tp_dst
;
1000 flower
.mask
.udp_dst
= mask
->tp_dst
;
1001 flower
.key
.udp_src
= key
->tp_src
;
1002 flower
.mask
.udp_src
= mask
->tp_src
;
1005 } else if (key
->nw_proto
== IPPROTO_SCTP
) {
1006 flower
.key
.sctp_dst
= key
->tp_dst
;
1007 flower
.mask
.sctp_dst
= mask
->tp_dst
;
1008 flower
.key
.sctp_src
= key
->tp_src
;
1009 flower
.mask
.sctp_src
= mask
->tp_src
;
1018 if (key
->dl_type
== htons(ETH_P_IP
)) {
1019 flower
.key
.ipv4
.ipv4_src
= key
->nw_src
;
1020 flower
.mask
.ipv4
.ipv4_src
= mask
->nw_src
;
1021 flower
.key
.ipv4
.ipv4_dst
= key
->nw_dst
;
1022 flower
.mask
.ipv4
.ipv4_dst
= mask
->nw_dst
;
1025 } else if (key
->dl_type
== htons(ETH_P_IPV6
)) {
1026 flower
.key
.ipv6
.ipv6_src
= key
->ipv6_src
;
1027 flower
.mask
.ipv6
.ipv6_src
= mask
->ipv6_src
;
1028 flower
.key
.ipv6
.ipv6_dst
= key
->ipv6_dst
;
1029 flower
.mask
.ipv6
.ipv6_dst
= mask
->ipv6_dst
;
1030 memset(&mask
->ipv6_src
, 0, sizeof mask
->ipv6_src
);
1031 memset(&mask
->ipv6_dst
, 0, sizeof mask
->ipv6_dst
);
1035 err
= test_key_and_mask(match
);
1040 NL_ATTR_FOR_EACH(nla
, left
, actions
, actions_len
) {
1041 if (flower
.action_count
>= TCA_ACT_MAX_PRIO
) {
1042 VLOG_DBG_RL(&rl
, "Can only support %d actions", flower
.action_count
);
1045 action
= &flower
.actions
[flower
.action_count
];
1046 if (nl_attr_type(nla
) == OVS_ACTION_ATTR_OUTPUT
) {
1047 odp_port_t port
= nl_attr_get_odp_port(nla
);
1048 struct netdev
*outdev
= netdev_ports_get(port
, info
->dpif_class
);
1050 action
->ifindex_out
= netdev_get_ifindex(outdev
);
1051 action
->type
= TC_ACT_OUTPUT
;
1052 flower
.action_count
++;
1053 netdev_close(outdev
);
1054 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_PUSH_VLAN
) {
1055 const struct ovs_action_push_vlan
*vlan_push
= nl_attr_get(nla
);
1057 action
->vlan
.vlan_push_id
= vlan_tci_to_vid(vlan_push
->vlan_tci
);
1058 action
->vlan
.vlan_push_prio
= vlan_tci_to_pcp(vlan_push
->vlan_tci
);
1059 action
->type
= TC_ACT_VLAN_PUSH
;
1060 flower
.action_count
++;
1061 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_POP_VLAN
) {
1062 action
->type
= TC_ACT_VLAN_POP
;
1063 flower
.action_count
++;
1064 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET
) {
1065 const struct nlattr
*set
= nl_attr_get(nla
);
1066 const size_t set_len
= nl_attr_get_size(nla
);
1068 err
= parse_put_flow_set_action(&flower
, action
, set
, set_len
);
1072 if (action
->type
== TC_ACT_ENCAP
) {
1073 action
->encap
.tp_dst
= info
->tp_dst_port
;
1075 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
) {
1076 const struct nlattr
*set
= nl_attr_get(nla
);
1077 const size_t set_len
= nl_attr_get_size(nla
);
1079 err
= parse_put_flow_set_masked_action(&flower
, action
, set
,
1085 VLOG_DBG_RL(&rl
, "unsupported put action type: %d",
1091 handle
= get_ufid_tc_mapping(ufid
, &prio
, NULL
);
1092 if (handle
&& prio
) {
1093 VLOG_DBG_RL(&rl
, "updating old handle: %d prio: %d", handle
, prio
);
1094 tc_del_filter(ifindex
, prio
, handle
);
1098 prio
= get_prio_for_tc_flower(&flower
);
1100 VLOG_ERR_RL(&rl
, "couldn't get tc prio: %s", ovs_strerror(ENOSPC
));
1105 flower
.act_cookie
.data
= ufid
;
1106 flower
.act_cookie
.len
= sizeof *ufid
;
1108 err
= tc_replace_flower(ifindex
, prio
, handle
, &flower
);
1110 add_ufid_tc_mapping(ufid
, flower
.prio
, flower
.handle
, netdev
, ifindex
);
1117 netdev_tc_flow_get(struct netdev
*netdev OVS_UNUSED
,
1118 struct match
*match
,
1119 struct nlattr
**actions
,
1120 const ovs_u128
*ufid
,
1121 struct dpif_flow_stats
*stats
,
1124 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
1126 struct tc_flower flower
;
1133 handle
= get_ufid_tc_mapping(ufid
, &prio
, &dev
);
1138 ifindex
= netdev_get_ifindex(dev
);
1140 VLOG_ERR_RL(&error_rl
, "flow_get: failed to get ifindex for %s: %s",
1141 netdev_get_name(dev
), ovs_strerror(-ifindex
));
1146 VLOG_DBG_RL(&rl
, "flow get (dev %s prio %d handle %d)",
1147 netdev_get_name(dev
), prio
, handle
);
1148 err
= tc_get_flower(ifindex
, prio
, handle
, &flower
);
1151 VLOG_ERR_RL(&error_rl
, "flow get failed (dev %s prio %d handle %d): %s",
1152 netdev_get_name(dev
), prio
, handle
, ovs_strerror(err
));
1156 in_port
= netdev_ifindex_to_odp_port(ifindex
);
1157 parse_tc_flower_to_match(&flower
, match
, actions
, stats
, buf
);
1159 match
->wc
.masks
.in_port
.odp_port
= u32_to_odp(UINT32_MAX
);
1160 match
->flow
.in_port
.odp_port
= in_port
;
1166 netdev_tc_flow_del(struct netdev
*netdev OVS_UNUSED
,
1167 const ovs_u128
*ufid
,
1168 struct dpif_flow_stats
*stats
)
1170 struct tc_flower flower
;
1177 handle
= get_ufid_tc_mapping(ufid
, &prio
, &dev
);
1182 ifindex
= netdev_get_ifindex(dev
);
1184 VLOG_ERR_RL(&error_rl
, "flow_del: failed to get ifindex for %s: %s",
1185 netdev_get_name(dev
), ovs_strerror(-ifindex
));
1191 memset(stats
, 0, sizeof *stats
);
1192 if (!tc_get_flower(ifindex
, prio
, handle
, &flower
)) {
1193 stats
->n_packets
= get_32aligned_u64(&flower
.stats
.n_packets
);
1194 stats
->n_bytes
= get_32aligned_u64(&flower
.stats
.n_bytes
);
1195 stats
->used
= flower
.lastused
;
1199 error
= tc_del_filter(ifindex
, prio
, handle
);
1200 del_ufid_tc_mapping(ufid
);
1208 probe_multi_mask_per_prio(int ifindex
)
1210 struct tc_flower flower
;
1213 memset(&flower
, 0, sizeof flower
);
1215 flower
.key
.eth_type
= htons(ETH_P_IP
);
1216 flower
.mask
.eth_type
= 0xfff;
1217 memset(&flower
.key
.dst_mac
, 0x11, sizeof flower
.key
.dst_mac
);
1218 memset(&flower
.mask
.dst_mac
, 0xff, sizeof flower
.mask
.dst_mac
);
1220 error
= tc_replace_flower(ifindex
, 1, 1, &flower
);
1225 memset(&flower
.key
.src_mac
, 0x11, sizeof flower
.key
.src_mac
);
1226 memset(&flower
.mask
.src_mac
, 0xff, sizeof flower
.mask
.src_mac
);
1228 error
= tc_replace_flower(ifindex
, 1, 2, &flower
);
1229 tc_del_filter(ifindex
, 1, 1);
1235 tc_del_filter(ifindex
, 1, 2);
1237 multi_mask_per_prio
= true;
1238 VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
1242 netdev_tc_init_flow_api(struct netdev
*netdev
)
1244 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
1248 ifindex
= netdev_get_ifindex(netdev
);
1250 VLOG_ERR_RL(&error_rl
, "init: failed to get ifindex for %s: %s",
1251 netdev_get_name(netdev
), ovs_strerror(-ifindex
));
1255 error
= tc_add_del_ingress_qdisc(ifindex
, true);
1257 if (error
&& error
!= EEXIST
) {
1258 VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
1259 ovs_strerror(error
));
1263 VLOG_INFO("added ingress qdisc to %s", netdev_get_name(netdev
));
1265 if (ovsthread_once_start(&once
)) {
1266 probe_multi_mask_per_prio(ifindex
);
1267 ovsthread_once_done(&once
);