2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
22 #include "dpif-netdev.h"
23 #include "netdev-offload-provider.h"
24 #include "netdev-provider.h"
25 #include "openvswitch/match.h"
26 #include "openvswitch/vlog.h"
30 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk
);
31 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(100, 5);
36 * Below API is NOT thread safe in following terms:
38 * - The caller must be sure that none of these functions will be called
39 * simultaneously. Even for different 'netdev's.
41 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
43 * - The caller must be sure that 'netdev' configuration will not be changed.
44 * For example, simultaneous call of 'netdev_reconfigure()' for the same
45 * 'netdev' is forbidden.
47 * For current implementation all above restrictions could be fulfilled by
48 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
51 * A mapping from ufid to dpdk rte_flow.
53 static struct cmap ufid_to_rte_flow
= CMAP_INITIALIZER
;
55 struct ufid_to_rte_flow_data
{
56 struct cmap_node node
;
58 struct rte_flow
*rte_flow
;
59 bool actions_offloaded
;
60 struct dpif_flow_stats stats
;
63 /* Find rte_flow with @ufid. */
64 static struct ufid_to_rte_flow_data
*
65 ufid_to_rte_flow_data_find(const ovs_u128
*ufid
)
67 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
68 struct ufid_to_rte_flow_data
*data
;
70 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
71 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
80 ufid_to_rte_flow_associate(const ovs_u128
*ufid
,
81 struct rte_flow
*rte_flow
, bool actions_offloaded
)
83 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
84 struct ufid_to_rte_flow_data
*data
= xzalloc(sizeof *data
);
85 struct ufid_to_rte_flow_data
*data_prev
;
88 * We should not simply overwrite an existing rte flow.
89 * We should have deleted it first before re-adding it.
90 * Thus, if following assert triggers, something is wrong:
91 * the rte_flow is not destroyed.
93 data_prev
= ufid_to_rte_flow_data_find(ufid
);
95 ovs_assert(data_prev
->rte_flow
== NULL
);
99 data
->rte_flow
= rte_flow
;
100 data
->actions_offloaded
= actions_offloaded
;
102 cmap_insert(&ufid_to_rte_flow
,
103 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
107 ufid_to_rte_flow_disassociate(const ovs_u128
*ufid
)
109 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
110 struct ufid_to_rte_flow_data
*data
;
112 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
113 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
114 cmap_remove(&ufid_to_rte_flow
,
115 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
116 ovsrcu_postpone(free
, data
);
121 VLOG_WARN("ufid "UUID_FMT
" is not associated with an rte flow\n",
122 UUID_ARGS((struct uuid
*) ufid
));
126 * To avoid individual xrealloc calls for each new element, a 'curent_max'
127 * is used to keep track of current allocated number of elements. Starts
128 * by 8 and doubles on each xrealloc call.
130 struct flow_patterns
{
131 struct rte_flow_item
*items
;
136 struct flow_actions
{
137 struct rte_flow_action
*actions
;
143 dump_flow_attr(struct ds
*s
, const struct rte_flow_attr
*attr
)
147 "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n",
148 attr
->ingress
, attr
->egress
, attr
->priority
, attr
->group
,
153 dump_flow_pattern(struct ds
*s
, const struct rte_flow_item
*item
)
155 if (item
->type
== RTE_FLOW_ITEM_TYPE_ETH
) {
156 const struct rte_flow_item_eth
*eth_spec
= item
->spec
;
157 const struct rte_flow_item_eth
*eth_mask
= item
->mask
;
159 ds_put_cstr(s
, "rte flow eth pattern:\n");
162 " Spec: src="ETH_ADDR_FMT
", dst="ETH_ADDR_FMT
", "
163 "type=0x%04" PRIx16
"\n",
164 ETH_ADDR_BYTES_ARGS(eth_spec
->src
.addr_bytes
),
165 ETH_ADDR_BYTES_ARGS(eth_spec
->dst
.addr_bytes
),
166 ntohs(eth_spec
->type
));
168 ds_put_cstr(s
, " Spec = null\n");
172 " Mask: src="ETH_ADDR_FMT
", dst="ETH_ADDR_FMT
", "
173 "type=0x%04"PRIx16
"\n",
174 ETH_ADDR_BYTES_ARGS(eth_mask
->src
.addr_bytes
),
175 ETH_ADDR_BYTES_ARGS(eth_mask
->dst
.addr_bytes
),
176 ntohs(eth_mask
->type
));
178 ds_put_cstr(s
, " Mask = null\n");
180 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_VLAN
) {
181 const struct rte_flow_item_vlan
*vlan_spec
= item
->spec
;
182 const struct rte_flow_item_vlan
*vlan_mask
= item
->mask
;
184 ds_put_cstr(s
, "rte flow vlan pattern:\n");
187 " Spec: inner_type=0x%"PRIx16
", tci=0x%"PRIx16
"\n",
188 ntohs(vlan_spec
->inner_type
), ntohs(vlan_spec
->tci
));
190 ds_put_cstr(s
, " Spec = null\n");
195 " Mask: inner_type=0x%"PRIx16
", tci=0x%"PRIx16
"\n",
196 ntohs(vlan_mask
->inner_type
), ntohs(vlan_mask
->tci
));
198 ds_put_cstr(s
, " Mask = null\n");
200 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_IPV4
) {
201 const struct rte_flow_item_ipv4
*ipv4_spec
= item
->spec
;
202 const struct rte_flow_item_ipv4
*ipv4_mask
= item
->mask
;
204 ds_put_cstr(s
, "rte flow ipv4 pattern:\n");
207 " Spec: tos=0x%"PRIx8
", ttl=%"PRIx8
209 ", src="IP_FMT
", dst="IP_FMT
"\n",
210 ipv4_spec
->hdr
.type_of_service
,
211 ipv4_spec
->hdr
.time_to_live
,
212 ipv4_spec
->hdr
.next_proto_id
,
213 IP_ARGS(ipv4_spec
->hdr
.src_addr
),
214 IP_ARGS(ipv4_spec
->hdr
.dst_addr
));
216 ds_put_cstr(s
, " Spec = null\n");
220 " Mask: tos=0x%"PRIx8
", ttl=%"PRIx8
222 ", src="IP_FMT
", dst="IP_FMT
"\n",
223 ipv4_mask
->hdr
.type_of_service
,
224 ipv4_mask
->hdr
.time_to_live
,
225 ipv4_mask
->hdr
.next_proto_id
,
226 IP_ARGS(ipv4_mask
->hdr
.src_addr
),
227 IP_ARGS(ipv4_mask
->hdr
.dst_addr
));
229 ds_put_cstr(s
, " Mask = null\n");
231 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_UDP
) {
232 const struct rte_flow_item_udp
*udp_spec
= item
->spec
;
233 const struct rte_flow_item_udp
*udp_mask
= item
->mask
;
235 ds_put_cstr(s
, "rte flow udp pattern:\n");
238 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
"\n",
239 ntohs(udp_spec
->hdr
.src_port
),
240 ntohs(udp_spec
->hdr
.dst_port
));
242 ds_put_cstr(s
, " Spec = null\n");
246 " Mask: src_port=0x%"PRIx16
247 ", dst_port=0x%"PRIx16
"\n",
248 ntohs(udp_mask
->hdr
.src_port
),
249 ntohs(udp_mask
->hdr
.dst_port
));
251 ds_put_cstr(s
, " Mask = null\n");
253 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_SCTP
) {
254 const struct rte_flow_item_sctp
*sctp_spec
= item
->spec
;
255 const struct rte_flow_item_sctp
*sctp_mask
= item
->mask
;
257 ds_put_cstr(s
, "rte flow sctp pattern:\n");
260 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
"\n",
261 ntohs(sctp_spec
->hdr
.src_port
),
262 ntohs(sctp_spec
->hdr
.dst_port
));
264 ds_put_cstr(s
, " Spec = null\n");
268 " Mask: src_port=0x%"PRIx16
269 ", dst_port=0x%"PRIx16
"\n",
270 ntohs(sctp_mask
->hdr
.src_port
),
271 ntohs(sctp_mask
->hdr
.dst_port
));
273 ds_put_cstr(s
, " Mask = null\n");
275 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_ICMP
) {
276 const struct rte_flow_item_icmp
*icmp_spec
= item
->spec
;
277 const struct rte_flow_item_icmp
*icmp_mask
= item
->mask
;
279 ds_put_cstr(s
, "rte flow icmp pattern:\n");
282 " Spec: icmp_type=%"PRIu8
", icmp_code=%"PRIu8
"\n",
283 icmp_spec
->hdr
.icmp_type
,
284 icmp_spec
->hdr
.icmp_code
);
286 ds_put_cstr(s
, " Spec = null\n");
290 " Mask: icmp_type=0x%"PRIx8
291 ", icmp_code=0x%"PRIx8
"\n",
292 icmp_spec
->hdr
.icmp_type
,
293 icmp_spec
->hdr
.icmp_code
);
295 ds_put_cstr(s
, " Mask = null\n");
297 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_TCP
) {
298 const struct rte_flow_item_tcp
*tcp_spec
= item
->spec
;
299 const struct rte_flow_item_tcp
*tcp_mask
= item
->mask
;
301 ds_put_cstr(s
, "rte flow tcp pattern:\n");
304 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
305 ", data_off=0x%"PRIx8
", tcp_flags=0x%"PRIx8
"\n",
306 ntohs(tcp_spec
->hdr
.src_port
),
307 ntohs(tcp_spec
->hdr
.dst_port
),
308 tcp_spec
->hdr
.data_off
,
309 tcp_spec
->hdr
.tcp_flags
);
311 ds_put_cstr(s
, " Spec = null\n");
315 " Mask: src_port=%"PRIx16
", dst_port=%"PRIx16
316 ", data_off=0x%"PRIx8
", tcp_flags=0x%"PRIx8
"\n",
317 ntohs(tcp_mask
->hdr
.src_port
),
318 ntohs(tcp_mask
->hdr
.dst_port
),
319 tcp_mask
->hdr
.data_off
,
320 tcp_mask
->hdr
.tcp_flags
);
322 ds_put_cstr(s
, " Mask = null\n");
325 ds_put_format(s
, "unknown rte flow pattern (%d)\n", item
->type
);
330 dump_flow_action(struct ds
*s
, const struct rte_flow_action
*actions
)
332 if (actions
->type
== RTE_FLOW_ACTION_TYPE_MARK
) {
333 const struct rte_flow_action_mark
*mark
= actions
->conf
;
335 ds_put_cstr(s
, "rte flow mark action:\n");
337 ds_put_format(s
, " Mark: id=%d\n", mark
->id
);
339 ds_put_cstr(s
, " Mark = null\n");
341 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_RSS
) {
342 const struct rte_flow_action_rss
*rss
= actions
->conf
;
344 ds_put_cstr(s
, "rte flow RSS action:\n");
346 ds_put_format(s
, " RSS: queue_num=%d\n", rss
->queue_num
);
348 ds_put_cstr(s
, " RSS = null\n");
350 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_COUNT
) {
351 const struct rte_flow_action_count
*count
= actions
->conf
;
353 ds_put_cstr(s
, "rte flow count action:\n");
355 ds_put_format(s
, " Count: shared=%d, id=%d\n", count
->shared
,
358 ds_put_cstr(s
, " Count = null\n");
360 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_PORT_ID
) {
361 const struct rte_flow_action_port_id
*port_id
= actions
->conf
;
363 ds_put_cstr(s
, "rte flow port-id action:\n");
365 ds_put_format(s
, " Port-id: original=%d, id=%d\n",
366 port_id
->original
, port_id
->id
);
368 ds_put_cstr(s
, " Port-id = null\n");
371 ds_put_format(s
, "unknown rte flow action (%d)\n", actions
->type
);
376 dump_flow(struct ds
*s
,
377 const struct rte_flow_attr
*attr
,
378 const struct rte_flow_item
*items
,
379 const struct rte_flow_action
*actions
)
382 dump_flow_attr(s
, attr
);
384 while (items
&& items
->type
!= RTE_FLOW_ITEM_TYPE_END
) {
385 dump_flow_pattern(s
, items
++);
387 while (actions
&& actions
->type
!= RTE_FLOW_ACTION_TYPE_END
) {
388 dump_flow_action(s
, actions
++);
393 static struct rte_flow
*
394 netdev_offload_dpdk_flow_create(struct netdev
*netdev
,
395 const struct rte_flow_attr
*attr
,
396 const struct rte_flow_item
*items
,
397 const struct rte_flow_action
*actions
,
398 struct rte_flow_error
*error
)
400 struct rte_flow
*flow
;
403 flow
= netdev_dpdk_rte_flow_create(netdev
, attr
, items
, actions
, error
);
405 if (!VLOG_DROP_DBG(&rl
)) {
407 dump_flow(&s
, attr
, items
, actions
);
408 VLOG_DBG_RL(&rl
, "%s: rte_flow 0x%"PRIxPTR
" created:\n%s",
409 netdev_get_name(netdev
), (intptr_t) flow
, ds_cstr(&s
));
413 enum vlog_level level
= VLL_WARN
;
415 if (error
->type
== RTE_FLOW_ERROR_TYPE_ACTION
) {
418 VLOG_RL(&rl
, level
, "%s: rte_flow creation failed: %d (%s).",
419 netdev_get_name(netdev
), error
->type
, error
->message
);
420 if (!vlog_should_drop(&this_module
, level
, &rl
)) {
422 dump_flow(&s
, attr
, items
, actions
);
423 VLOG_RL(&rl
, level
, "Failed flow:\n%s", ds_cstr(&s
));
431 add_flow_pattern(struct flow_patterns
*patterns
, enum rte_flow_item_type type
,
432 const void *spec
, const void *mask
)
434 int cnt
= patterns
->cnt
;
437 patterns
->current_max
= 8;
438 patterns
->items
= xcalloc(patterns
->current_max
,
439 sizeof *patterns
->items
);
440 } else if (cnt
== patterns
->current_max
) {
441 patterns
->current_max
*= 2;
442 patterns
->items
= xrealloc(patterns
->items
, patterns
->current_max
*
443 sizeof *patterns
->items
);
446 patterns
->items
[cnt
].type
= type
;
447 patterns
->items
[cnt
].spec
= spec
;
448 patterns
->items
[cnt
].mask
= mask
;
449 patterns
->items
[cnt
].last
= NULL
;
454 add_flow_action(struct flow_actions
*actions
, enum rte_flow_action_type type
,
457 int cnt
= actions
->cnt
;
460 actions
->current_max
= 8;
461 actions
->actions
= xcalloc(actions
->current_max
,
462 sizeof *actions
->actions
);
463 } else if (cnt
== actions
->current_max
) {
464 actions
->current_max
*= 2;
465 actions
->actions
= xrealloc(actions
->actions
, actions
->current_max
*
466 sizeof *actions
->actions
);
469 actions
->actions
[cnt
].type
= type
;
470 actions
->actions
[cnt
].conf
= conf
;
475 free_flow_patterns(struct flow_patterns
*patterns
)
479 for (i
= 0; i
< patterns
->cnt
; i
++) {
480 if (patterns
->items
[i
].spec
) {
481 free(CONST_CAST(void *, patterns
->items
[i
].spec
));
483 if (patterns
->items
[i
].mask
) {
484 free(CONST_CAST(void *, patterns
->items
[i
].mask
));
487 free(patterns
->items
);
488 patterns
->items
= NULL
;
493 free_flow_actions(struct flow_actions
*actions
)
497 for (i
= 0; i
< actions
->cnt
; i
++) {
498 if (actions
->actions
[i
].conf
) {
499 free(CONST_CAST(void *, actions
->actions
[i
].conf
));
502 free(actions
->actions
);
503 actions
->actions
= NULL
;
508 parse_flow_match(struct flow_patterns
*patterns
,
509 const struct match
*match
)
511 uint8_t *next_proto_mask
= NULL
;
515 if (!eth_addr_is_zero(match
->wc
.masks
.dl_src
) ||
516 !eth_addr_is_zero(match
->wc
.masks
.dl_dst
)) {
517 struct rte_flow_item_eth
*spec
, *mask
;
519 spec
= xzalloc(sizeof *spec
);
520 mask
= xzalloc(sizeof *mask
);
522 memcpy(&spec
->dst
, &match
->flow
.dl_dst
, sizeof spec
->dst
);
523 memcpy(&spec
->src
, &match
->flow
.dl_src
, sizeof spec
->src
);
524 spec
->type
= match
->flow
.dl_type
;
526 memcpy(&mask
->dst
, &match
->wc
.masks
.dl_dst
, sizeof mask
->dst
);
527 memcpy(&mask
->src
, &match
->wc
.masks
.dl_src
, sizeof mask
->src
);
528 mask
->type
= match
->wc
.masks
.dl_type
;
530 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, spec
, mask
);
533 * If user specifies a flow (like UDP flow) without L2 patterns,
534 * OVS will at least set the dl_type. Normally, it's enough to
535 * create an eth pattern just with it. Unluckily, some Intel's
536 * NIC (such as XL710) doesn't support that. Below is a workaround,
537 * which simply matches any L2 pkts.
539 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, NULL
, NULL
);
543 if (match
->wc
.masks
.vlans
[0].tci
&& match
->flow
.vlans
[0].tci
) {
544 struct rte_flow_item_vlan
*spec
, *mask
;
546 spec
= xzalloc(sizeof *spec
);
547 mask
= xzalloc(sizeof *mask
);
549 spec
->tci
= match
->flow
.vlans
[0].tci
& ~htons(VLAN_CFI
);
550 mask
->tci
= match
->wc
.masks
.vlans
[0].tci
& ~htons(VLAN_CFI
);
552 /* Match any protocols. */
553 mask
->inner_type
= 0;
555 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_VLAN
, spec
, mask
);
559 if (match
->flow
.dl_type
== htons(ETH_TYPE_IP
)) {
560 struct rte_flow_item_ipv4
*spec
, *mask
;
562 spec
= xzalloc(sizeof *spec
);
563 mask
= xzalloc(sizeof *mask
);
565 spec
->hdr
.type_of_service
= match
->flow
.nw_tos
;
566 spec
->hdr
.time_to_live
= match
->flow
.nw_ttl
;
567 spec
->hdr
.next_proto_id
= match
->flow
.nw_proto
;
568 spec
->hdr
.src_addr
= match
->flow
.nw_src
;
569 spec
->hdr
.dst_addr
= match
->flow
.nw_dst
;
571 mask
->hdr
.type_of_service
= match
->wc
.masks
.nw_tos
;
572 mask
->hdr
.time_to_live
= match
->wc
.masks
.nw_ttl
;
573 mask
->hdr
.next_proto_id
= match
->wc
.masks
.nw_proto
;
574 mask
->hdr
.src_addr
= match
->wc
.masks
.nw_src
;
575 mask
->hdr
.dst_addr
= match
->wc
.masks
.nw_dst
;
577 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_IPV4
, spec
, mask
);
579 /* Save proto for L4 protocol setup. */
580 proto
= spec
->hdr
.next_proto_id
&
581 mask
->hdr
.next_proto_id
;
582 next_proto_mask
= &mask
->hdr
.next_proto_id
;
585 if (proto
!= IPPROTO_ICMP
&& proto
!= IPPROTO_UDP
&&
586 proto
!= IPPROTO_SCTP
&& proto
!= IPPROTO_TCP
&&
587 (match
->wc
.masks
.tp_src
||
588 match
->wc
.masks
.tp_dst
||
589 match
->wc
.masks
.tcp_flags
)) {
590 VLOG_DBG("L4 Protocol (%u) not supported", proto
);
594 if ((match
->wc
.masks
.tp_src
&& match
->wc
.masks
.tp_src
!= OVS_BE16_MAX
) ||
595 (match
->wc
.masks
.tp_dst
&& match
->wc
.masks
.tp_dst
!= OVS_BE16_MAX
)) {
599 if (proto
== IPPROTO_TCP
) {
600 struct rte_flow_item_tcp
*spec
, *mask
;
602 spec
= xzalloc(sizeof *spec
);
603 mask
= xzalloc(sizeof *mask
);
605 spec
->hdr
.src_port
= match
->flow
.tp_src
;
606 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
607 spec
->hdr
.data_off
= ntohs(match
->flow
.tcp_flags
) >> 8;
608 spec
->hdr
.tcp_flags
= ntohs(match
->flow
.tcp_flags
) & 0xff;
610 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
611 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
612 mask
->hdr
.data_off
= ntohs(match
->wc
.masks
.tcp_flags
) >> 8;
613 mask
->hdr
.tcp_flags
= ntohs(match
->wc
.masks
.tcp_flags
) & 0xff;
615 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_TCP
, spec
, mask
);
617 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
618 if (next_proto_mask
) {
619 *next_proto_mask
= 0;
621 } else if (proto
== IPPROTO_UDP
) {
622 struct rte_flow_item_udp
*spec
, *mask
;
624 spec
= xzalloc(sizeof *spec
);
625 mask
= xzalloc(sizeof *mask
);
627 spec
->hdr
.src_port
= match
->flow
.tp_src
;
628 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
630 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
631 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
633 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_UDP
, spec
, mask
);
635 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
636 if (next_proto_mask
) {
637 *next_proto_mask
= 0;
639 } else if (proto
== IPPROTO_SCTP
) {
640 struct rte_flow_item_sctp
*spec
, *mask
;
642 spec
= xzalloc(sizeof *spec
);
643 mask
= xzalloc(sizeof *mask
);
645 spec
->hdr
.src_port
= match
->flow
.tp_src
;
646 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
648 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
649 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
651 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_SCTP
, spec
, mask
);
653 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
654 if (next_proto_mask
) {
655 *next_proto_mask
= 0;
657 } else if (proto
== IPPROTO_ICMP
) {
658 struct rte_flow_item_icmp
*spec
, *mask
;
660 spec
= xzalloc(sizeof *spec
);
661 mask
= xzalloc(sizeof *mask
);
663 spec
->hdr
.icmp_type
= (uint8_t) ntohs(match
->flow
.tp_src
);
664 spec
->hdr
.icmp_code
= (uint8_t) ntohs(match
->flow
.tp_dst
);
666 mask
->hdr
.icmp_type
= (uint8_t) ntohs(match
->wc
.masks
.tp_src
);
667 mask
->hdr
.icmp_code
= (uint8_t) ntohs(match
->wc
.masks
.tp_dst
);
669 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ICMP
, spec
, mask
);
671 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
672 if (next_proto_mask
) {
673 *next_proto_mask
= 0;
677 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_END
, NULL
, NULL
);
683 add_flow_mark_rss_actions(struct flow_actions
*actions
,
685 const struct netdev
*netdev
)
687 struct rte_flow_action_mark
*mark
;
688 struct action_rss_data
{
689 struct rte_flow_action_rss conf
;
692 BUILD_ASSERT_DECL(offsetof(struct action_rss_data
, conf
) == 0);
695 mark
= xzalloc(sizeof *mark
);
697 mark
->id
= flow_mark
;
698 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_MARK
, mark
);
700 rss_data
= xmalloc(sizeof *rss_data
+
701 netdev_n_rxq(netdev
) * sizeof rss_data
->queue
[0]);
702 *rss_data
= (struct action_rss_data
) {
703 .conf
= (struct rte_flow_action_rss
) {
704 .func
= RTE_ETH_HASH_FUNCTION_DEFAULT
,
707 .queue_num
= netdev_n_rxq(netdev
),
708 .queue
= rss_data
->queue
,
714 /* Override queue array with default. */
715 for (i
= 0; i
< netdev_n_rxq(netdev
); i
++) {
716 rss_data
->queue
[i
] = i
;
719 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_RSS
, &rss_data
->conf
);
720 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
723 static struct rte_flow
*
724 netdev_offload_dpdk_mark_rss(struct flow_patterns
*patterns
,
725 struct netdev
*netdev
,
728 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
729 const struct rte_flow_attr flow_attr
= {
735 struct rte_flow_error error
;
736 struct rte_flow
*flow
;
738 add_flow_mark_rss_actions(&actions
, flow_mark
, netdev
);
740 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
741 actions
.actions
, &error
);
743 free_flow_actions(&actions
);
748 add_count_action(struct flow_actions
*actions
)
750 struct rte_flow_action_count
*count
= xzalloc(sizeof *count
);
752 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_COUNT
, count
);
756 add_port_id_action(struct flow_actions
*actions
,
757 struct netdev
*outdev
)
759 struct rte_flow_action_port_id
*port_id
;
762 outdev_id
= netdev_dpdk_get_port_id(outdev
);
766 port_id
= xzalloc(sizeof *port_id
);
767 port_id
->id
= outdev_id
;
768 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_PORT_ID
, port_id
);
773 add_output_action(struct netdev
*netdev
,
774 struct flow_actions
*actions
,
775 const struct nlattr
*nla
,
776 struct offload_info
*info
)
778 struct netdev
*outdev
;
782 port
= nl_attr_get_odp_port(nla
);
783 outdev
= netdev_ports_get(port
, info
->dpif_class
);
784 if (outdev
== NULL
) {
785 VLOG_DBG_RL(&rl
, "Cannot find netdev for odp port %"PRIu32
, port
);
788 if (!netdev_flow_api_equals(netdev
, outdev
) ||
789 add_port_id_action(actions
, outdev
)) {
790 VLOG_DBG_RL(&rl
, "%s: Output to port \'%s\' cannot be offloaded.",
791 netdev_get_name(netdev
), netdev_get_name(outdev
));
794 netdev_close(outdev
);
799 parse_flow_actions(struct netdev
*netdev
,
800 struct flow_actions
*actions
,
801 struct nlattr
*nl_actions
,
802 size_t nl_actions_len
,
803 struct offload_info
*info
)
808 add_count_action(actions
);
809 NL_ATTR_FOR_EACH_UNSAFE (nla
, left
, nl_actions
, nl_actions_len
) {
810 if (nl_attr_type(nla
) == OVS_ACTION_ATTR_OUTPUT
) {
811 if (add_output_action(netdev
, actions
, nla
, info
)) {
815 VLOG_DBG_RL(&rl
, "Unsupported action type %d", nl_attr_type(nla
));
820 if (nl_actions_len
== 0) {
821 VLOG_DBG_RL(&rl
, "No actions provided");
825 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
829 static struct rte_flow
*
830 netdev_offload_dpdk_actions(struct netdev
*netdev
,
831 struct flow_patterns
*patterns
,
832 struct nlattr
*nl_actions
,
834 struct offload_info
*info
)
836 const struct rte_flow_attr flow_attr
= { .ingress
= 1, .transfer
= 1 };
837 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
838 struct rte_flow
*flow
= NULL
;
839 struct rte_flow_error error
;
842 ret
= parse_flow_actions(netdev
, &actions
, nl_actions
, actions_len
, info
);
846 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
847 actions
.actions
, &error
);
849 free_flow_actions(&actions
);
854 netdev_offload_dpdk_add_flow(struct netdev
*netdev
,
855 const struct match
*match
,
856 struct nlattr
*nl_actions
,
858 const ovs_u128
*ufid
,
859 struct offload_info
*info
)
861 struct flow_patterns patterns
= { .items
= NULL
, .cnt
= 0 };
862 bool actions_offloaded
= true;
863 struct rte_flow
*flow
;
866 ret
= parse_flow_match(&patterns
, match
);
871 flow
= netdev_offload_dpdk_actions(netdev
, &patterns
, nl_actions
,
874 /* If we failed to offload the rule actions fallback to MARK+RSS
877 flow
= netdev_offload_dpdk_mark_rss(&patterns
, netdev
,
879 actions_offloaded
= false;
886 ufid_to_rte_flow_associate(ufid
, flow
, actions_offloaded
);
887 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT
"\n",
888 netdev_get_name(netdev
), flow
, UUID_ARGS((struct uuid
*)ufid
));
891 free_flow_patterns(&patterns
);
896 * Check if any unsupported flow patterns are specified.
899 netdev_offload_dpdk_validate_flow(const struct match
*match
)
901 struct match match_zero_wc
;
902 const struct flow
*masks
= &match
->wc
.masks
;
904 /* Create a wc-zeroed version of flow. */
905 match_init(&match_zero_wc
, &match
->flow
, &match
->wc
);
907 if (!is_all_zeros(&match_zero_wc
.flow
.tunnel
,
908 sizeof match_zero_wc
.flow
.tunnel
)) {
912 if (masks
->metadata
|| masks
->skb_priority
||
913 masks
->pkt_mark
|| masks
->dp_hash
) {
917 /* recirc id must be zero. */
918 if (match_zero_wc
.flow
.recirc_id
) {
922 if (masks
->ct_state
|| masks
->ct_nw_proto
||
923 masks
->ct_zone
|| masks
->ct_mark
||
924 !ovs_u128_is_zero(masks
->ct_label
)) {
928 if (masks
->conj_id
|| masks
->actset_output
) {
932 /* Unsupported L2. */
933 if (!is_all_zeros(masks
->mpls_lse
, sizeof masks
->mpls_lse
)) {
937 /* Unsupported L3. */
938 if (masks
->ipv6_label
|| masks
->ct_nw_src
|| masks
->ct_nw_dst
||
939 !is_all_zeros(&masks
->ipv6_src
, sizeof masks
->ipv6_src
) ||
940 !is_all_zeros(&masks
->ipv6_dst
, sizeof masks
->ipv6_dst
) ||
941 !is_all_zeros(&masks
->ct_ipv6_src
, sizeof masks
->ct_ipv6_src
) ||
942 !is_all_zeros(&masks
->ct_ipv6_dst
, sizeof masks
->ct_ipv6_dst
) ||
943 !is_all_zeros(&masks
->nd_target
, sizeof masks
->nd_target
) ||
944 !is_all_zeros(&masks
->nsh
, sizeof masks
->nsh
) ||
945 !is_all_zeros(&masks
->arp_sha
, sizeof masks
->arp_sha
) ||
946 !is_all_zeros(&masks
->arp_tha
, sizeof masks
->arp_tha
)) {
950 /* If fragmented, then don't HW accelerate - for now. */
951 if (match_zero_wc
.flow
.nw_frag
) {
955 /* Unsupported L4. */
956 if (masks
->igmp_group_ip4
|| masks
->ct_tp_src
|| masks
->ct_tp_dst
) {
963 VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols");
968 netdev_offload_dpdk_destroy_flow(struct netdev
*netdev
,
969 const ovs_u128
*ufid
,
970 struct rte_flow
*rte_flow
)
972 struct rte_flow_error error
;
973 int ret
= netdev_dpdk_rte_flow_destroy(netdev
, rte_flow
, &error
);
976 ufid_to_rte_flow_disassociate(ufid
);
977 VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT
"\n",
978 netdev_get_name(netdev
), rte_flow
,
979 UUID_ARGS((struct uuid
*)ufid
));
981 VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n",
982 netdev_get_name(netdev
), error
.message
, error
.type
);
989 netdev_offload_dpdk_flow_put(struct netdev
*netdev
, struct match
*match
,
990 struct nlattr
*actions
, size_t actions_len
,
991 const ovs_u128
*ufid
, struct offload_info
*info
,
992 struct dpif_flow_stats
*stats
)
994 struct ufid_to_rte_flow_data
*rte_flow_data
;
998 * If an old rte_flow exists, it means it's a flow modification.
999 * Here destroy the old rte flow first before adding a new one.
1001 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1002 if (rte_flow_data
&& rte_flow_data
->rte_flow
) {
1003 ret
= netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1004 rte_flow_data
->rte_flow
);
1010 ret
= netdev_offload_dpdk_validate_flow(match
);
1016 memset(stats
, 0, sizeof *stats
);
1018 return netdev_offload_dpdk_add_flow(netdev
, match
, actions
,
1019 actions_len
, ufid
, info
);
1023 netdev_offload_dpdk_flow_del(struct netdev
*netdev
, const ovs_u128
*ufid
,
1024 struct dpif_flow_stats
*stats
)
1026 struct ufid_to_rte_flow_data
*rte_flow_data
;
1028 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1029 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1034 memset(stats
, 0, sizeof *stats
);
1036 return netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1037 rte_flow_data
->rte_flow
);
1041 netdev_offload_dpdk_init_flow_api(struct netdev
*netdev
)
1043 return netdev_dpdk_flow_api_supported(netdev
) ? 0 : EOPNOTSUPP
;
1047 netdev_offload_dpdk_flow_get(struct netdev
*netdev
,
1048 struct match
*match OVS_UNUSED
,
1049 struct nlattr
**actions OVS_UNUSED
,
1050 const ovs_u128
*ufid
,
1051 struct dpif_flow_stats
*stats
,
1052 struct dpif_flow_attrs
*attrs
,
1053 struct ofpbuf
*buf OVS_UNUSED
)
1055 struct rte_flow_query_count query
= { .reset
= 1 };
1056 struct ufid_to_rte_flow_data
*rte_flow_data
;
1057 struct rte_flow_error error
;
1060 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1061 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1066 attrs
->offloaded
= true;
1067 if (!rte_flow_data
->actions_offloaded
) {
1068 attrs
->dp_layer
= "ovs";
1069 memset(stats
, 0, sizeof *stats
);
1072 attrs
->dp_layer
= "dpdk";
1073 ret
= netdev_dpdk_rte_flow_query_count(netdev
, rte_flow_data
->rte_flow
,
1076 VLOG_DBG_RL(&rl
, "%s: Failed to query ufid "UUID_FMT
" flow: %p\n",
1077 netdev_get_name(netdev
), UUID_ARGS((struct uuid
*) ufid
),
1078 rte_flow_data
->rte_flow
);
1081 rte_flow_data
->stats
.n_packets
+= (query
.hits_set
) ? query
.hits
: 0;
1082 rte_flow_data
->stats
.n_bytes
+= (query
.bytes_set
) ? query
.bytes
: 0;
1083 if (query
.hits_set
&& query
.hits
) {
1084 rte_flow_data
->stats
.used
= time_msec();
1086 memcpy(stats
, &rte_flow_data
->stats
, sizeof *stats
);
1091 const struct netdev_flow_api netdev_offload_dpdk
= {
1092 .type
= "dpdk_flow_api",
1093 .flow_put
= netdev_offload_dpdk_flow_put
,
1094 .flow_del
= netdev_offload_dpdk_flow_del
,
1095 .init_flow_api
= netdev_offload_dpdk_init_flow_api
,
1096 .flow_get
= netdev_offload_dpdk_flow_get
,