2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
22 #include "dpif-netdev.h"
23 #include "netdev-offload-provider.h"
24 #include "netdev-provider.h"
25 #include "openvswitch/match.h"
26 #include "openvswitch/vlog.h"
30 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk
);
31 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(100, 5);
36 * Below API is NOT thread safe in following terms:
38 * - The caller must be sure that none of these functions will be called
39 * simultaneously. Even for different 'netdev's.
41 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
43 * - The caller must be sure that 'netdev' configuration will not be changed.
44 * For example, simultaneous call of 'netdev_reconfigure()' for the same
45 * 'netdev' is forbidden.
47 * For current implementation all above restrictions could be fulfilled by
48 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
51 * A mapping from ufid to dpdk rte_flow.
53 static struct cmap ufid_to_rte_flow
= CMAP_INITIALIZER
;
55 struct ufid_to_rte_flow_data
{
56 struct cmap_node node
;
58 struct rte_flow
*rte_flow
;
59 bool actions_offloaded
;
60 struct dpif_flow_stats stats
;
63 /* Find rte_flow with @ufid. */
64 static struct ufid_to_rte_flow_data
*
65 ufid_to_rte_flow_data_find(const ovs_u128
*ufid
)
67 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
68 struct ufid_to_rte_flow_data
*data
;
70 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
71 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
80 ufid_to_rte_flow_associate(const ovs_u128
*ufid
,
81 struct rte_flow
*rte_flow
, bool actions_offloaded
)
83 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
84 struct ufid_to_rte_flow_data
*data
= xzalloc(sizeof *data
);
85 struct ufid_to_rte_flow_data
*data_prev
;
88 * We should not simply overwrite an existing rte flow.
89 * We should have deleted it first before re-adding it.
90 * Thus, if following assert triggers, something is wrong:
91 * the rte_flow is not destroyed.
93 data_prev
= ufid_to_rte_flow_data_find(ufid
);
95 ovs_assert(data_prev
->rte_flow
== NULL
);
99 data
->rte_flow
= rte_flow
;
100 data
->actions_offloaded
= actions_offloaded
;
102 cmap_insert(&ufid_to_rte_flow
,
103 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
107 ufid_to_rte_flow_disassociate(const ovs_u128
*ufid
)
109 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
110 struct ufid_to_rte_flow_data
*data
;
112 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
113 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
114 cmap_remove(&ufid_to_rte_flow
,
115 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
116 ovsrcu_postpone(free
, data
);
121 VLOG_WARN("ufid "UUID_FMT
" is not associated with an rte flow\n",
122 UUID_ARGS((struct uuid
*) ufid
));
126 * To avoid individual xrealloc calls for each new element, a 'curent_max'
127 * is used to keep track of current allocated number of elements. Starts
128 * by 8 and doubles on each xrealloc call.
130 struct flow_patterns
{
131 struct rte_flow_item
*items
;
136 struct flow_actions
{
137 struct rte_flow_action
*actions
;
143 dump_flow_attr(struct ds
*s
, const struct rte_flow_attr
*attr
)
147 "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n",
148 attr
->ingress
, attr
->egress
, attr
->priority
, attr
->group
,
153 dump_flow_pattern(struct ds
*s
, const struct rte_flow_item
*item
)
155 if (item
->type
== RTE_FLOW_ITEM_TYPE_ETH
) {
156 const struct rte_flow_item_eth
*eth_spec
= item
->spec
;
157 const struct rte_flow_item_eth
*eth_mask
= item
->mask
;
159 ds_put_cstr(s
, "rte flow eth pattern:\n");
162 " Spec: src="ETH_ADDR_FMT
", dst="ETH_ADDR_FMT
", "
163 "type=0x%04" PRIx16
"\n",
164 ETH_ADDR_BYTES_ARGS(eth_spec
->src
.addr_bytes
),
165 ETH_ADDR_BYTES_ARGS(eth_spec
->dst
.addr_bytes
),
166 ntohs(eth_spec
->type
));
168 ds_put_cstr(s
, " Spec = null\n");
172 " Mask: src="ETH_ADDR_FMT
", dst="ETH_ADDR_FMT
", "
173 "type=0x%04"PRIx16
"\n",
174 ETH_ADDR_BYTES_ARGS(eth_mask
->src
.addr_bytes
),
175 ETH_ADDR_BYTES_ARGS(eth_mask
->dst
.addr_bytes
),
176 ntohs(eth_mask
->type
));
178 ds_put_cstr(s
, " Mask = null\n");
180 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_VLAN
) {
181 const struct rte_flow_item_vlan
*vlan_spec
= item
->spec
;
182 const struct rte_flow_item_vlan
*vlan_mask
= item
->mask
;
184 ds_put_cstr(s
, "rte flow vlan pattern:\n");
187 " Spec: inner_type=0x%"PRIx16
", tci=0x%"PRIx16
"\n",
188 ntohs(vlan_spec
->inner_type
), ntohs(vlan_spec
->tci
));
190 ds_put_cstr(s
, " Spec = null\n");
195 " Mask: inner_type=0x%"PRIx16
", tci=0x%"PRIx16
"\n",
196 ntohs(vlan_mask
->inner_type
), ntohs(vlan_mask
->tci
));
198 ds_put_cstr(s
, " Mask = null\n");
200 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_IPV4
) {
201 const struct rte_flow_item_ipv4
*ipv4_spec
= item
->spec
;
202 const struct rte_flow_item_ipv4
*ipv4_mask
= item
->mask
;
204 ds_put_cstr(s
, "rte flow ipv4 pattern:\n");
207 " Spec: tos=0x%"PRIx8
", ttl=%"PRIx8
209 ", src="IP_FMT
", dst="IP_FMT
"\n",
210 ipv4_spec
->hdr
.type_of_service
,
211 ipv4_spec
->hdr
.time_to_live
,
212 ipv4_spec
->hdr
.next_proto_id
,
213 IP_ARGS(ipv4_spec
->hdr
.src_addr
),
214 IP_ARGS(ipv4_spec
->hdr
.dst_addr
));
216 ds_put_cstr(s
, " Spec = null\n");
220 " Mask: tos=0x%"PRIx8
", ttl=%"PRIx8
222 ", src="IP_FMT
", dst="IP_FMT
"\n",
223 ipv4_mask
->hdr
.type_of_service
,
224 ipv4_mask
->hdr
.time_to_live
,
225 ipv4_mask
->hdr
.next_proto_id
,
226 IP_ARGS(ipv4_mask
->hdr
.src_addr
),
227 IP_ARGS(ipv4_mask
->hdr
.dst_addr
));
229 ds_put_cstr(s
, " Mask = null\n");
231 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_UDP
) {
232 const struct rte_flow_item_udp
*udp_spec
= item
->spec
;
233 const struct rte_flow_item_udp
*udp_mask
= item
->mask
;
235 ds_put_cstr(s
, "rte flow udp pattern:\n");
238 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
"\n",
239 ntohs(udp_spec
->hdr
.src_port
),
240 ntohs(udp_spec
->hdr
.dst_port
));
242 ds_put_cstr(s
, " Spec = null\n");
246 " Mask: src_port=0x%"PRIx16
247 ", dst_port=0x%"PRIx16
"\n",
248 ntohs(udp_mask
->hdr
.src_port
),
249 ntohs(udp_mask
->hdr
.dst_port
));
251 ds_put_cstr(s
, " Mask = null\n");
253 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_SCTP
) {
254 const struct rte_flow_item_sctp
*sctp_spec
= item
->spec
;
255 const struct rte_flow_item_sctp
*sctp_mask
= item
->mask
;
257 ds_put_cstr(s
, "rte flow sctp pattern:\n");
260 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
"\n",
261 ntohs(sctp_spec
->hdr
.src_port
),
262 ntohs(sctp_spec
->hdr
.dst_port
));
264 ds_put_cstr(s
, " Spec = null\n");
268 " Mask: src_port=0x%"PRIx16
269 ", dst_port=0x%"PRIx16
"\n",
270 ntohs(sctp_mask
->hdr
.src_port
),
271 ntohs(sctp_mask
->hdr
.dst_port
));
273 ds_put_cstr(s
, " Mask = null\n");
275 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_ICMP
) {
276 const struct rte_flow_item_icmp
*icmp_spec
= item
->spec
;
277 const struct rte_flow_item_icmp
*icmp_mask
= item
->mask
;
279 ds_put_cstr(s
, "rte flow icmp pattern:\n");
282 " Spec: icmp_type=%"PRIu8
", icmp_code=%"PRIu8
"\n",
283 icmp_spec
->hdr
.icmp_type
,
284 icmp_spec
->hdr
.icmp_code
);
286 ds_put_cstr(s
, " Spec = null\n");
290 " Mask: icmp_type=0x%"PRIx8
291 ", icmp_code=0x%"PRIx8
"\n",
292 icmp_spec
->hdr
.icmp_type
,
293 icmp_spec
->hdr
.icmp_code
);
295 ds_put_cstr(s
, " Mask = null\n");
297 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_TCP
) {
298 const struct rte_flow_item_tcp
*tcp_spec
= item
->spec
;
299 const struct rte_flow_item_tcp
*tcp_mask
= item
->mask
;
301 ds_put_cstr(s
, "rte flow tcp pattern:\n");
304 " Spec: src_port=%"PRIu16
", dst_port=%"PRIu16
305 ", data_off=0x%"PRIx8
", tcp_flags=0x%"PRIx8
"\n",
306 ntohs(tcp_spec
->hdr
.src_port
),
307 ntohs(tcp_spec
->hdr
.dst_port
),
308 tcp_spec
->hdr
.data_off
,
309 tcp_spec
->hdr
.tcp_flags
);
311 ds_put_cstr(s
, " Spec = null\n");
315 " Mask: src_port=%"PRIx16
", dst_port=%"PRIx16
316 ", data_off=0x%"PRIx8
", tcp_flags=0x%"PRIx8
"\n",
317 ntohs(tcp_mask
->hdr
.src_port
),
318 ntohs(tcp_mask
->hdr
.dst_port
),
319 tcp_mask
->hdr
.data_off
,
320 tcp_mask
->hdr
.tcp_flags
);
322 ds_put_cstr(s
, " Mask = null\n");
325 ds_put_format(s
, "unknown rte flow pattern (%d)\n", item
->type
);
330 dump_flow_action(struct ds
*s
, const struct rte_flow_action
*actions
)
332 if (actions
->type
== RTE_FLOW_ACTION_TYPE_MARK
) {
333 const struct rte_flow_action_mark
*mark
= actions
->conf
;
335 ds_put_cstr(s
, "rte flow mark action:\n");
337 ds_put_format(s
, " Mark: id=%d\n", mark
->id
);
339 ds_put_cstr(s
, " Mark = null\n");
341 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_RSS
) {
342 const struct rte_flow_action_rss
*rss
= actions
->conf
;
344 ds_put_cstr(s
, "rte flow RSS action:\n");
346 ds_put_format(s
, " RSS: queue_num=%d\n", rss
->queue_num
);
348 ds_put_cstr(s
, " RSS = null\n");
350 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_COUNT
) {
351 const struct rte_flow_action_count
*count
= actions
->conf
;
353 ds_put_cstr(s
, "rte flow count action:\n");
355 ds_put_format(s
, " Count: shared=%d, id=%d\n", count
->shared
,
358 ds_put_cstr(s
, " Count = null\n");
360 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_PORT_ID
) {
361 const struct rte_flow_action_port_id
*port_id
= actions
->conf
;
363 ds_put_cstr(s
, "rte flow port-id action:\n");
365 ds_put_format(s
, " Port-id: original=%d, id=%d\n",
366 port_id
->original
, port_id
->id
);
368 ds_put_cstr(s
, " Port-id = null\n");
370 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_DROP
) {
371 ds_put_cstr(s
, "rte flow drop action\n");
372 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
||
373 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_DST
) {
374 const struct rte_flow_action_set_mac
*set_mac
= actions
->conf
;
376 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_DST
379 ds_put_format(s
, "rte flow set-mac-%s action:\n", dirstr
);
382 " Set-mac-%s: "ETH_ADDR_FMT
"\n", dirstr
,
383 ETH_ADDR_BYTES_ARGS(set_mac
->mac_addr
));
385 ds_put_format(s
, " Set-mac-%s = null\n", dirstr
);
387 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
||
388 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
) {
389 const struct rte_flow_action_set_ipv4
*set_ipv4
= actions
->conf
;
390 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
393 ds_put_format(s
, "rte flow set-ipv4-%s action:\n", dirstr
);
396 " Set-ipv4-%s: "IP_FMT
"\n", dirstr
,
397 IP_ARGS(set_ipv4
->ipv4_addr
));
399 ds_put_format(s
, " Set-ipv4-%s = null\n", dirstr
);
401 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TTL
) {
402 const struct rte_flow_action_set_ttl
*set_ttl
= actions
->conf
;
404 ds_put_cstr(s
, "rte flow set-ttl action:\n");
406 ds_put_format(s
, " Set-ttl: %d\n", set_ttl
->ttl_value
);
408 ds_put_cstr(s
, " Set-ttl = null\n");
410 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_SRC
||
411 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_DST
) {
412 const struct rte_flow_action_set_tp
*set_tp
= actions
->conf
;
413 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_DST
416 ds_put_format(s
, "rte flow set-tcp/udp-port-%s action:\n", dirstr
);
418 ds_put_format(s
, " Set-%s-tcp/udp-port: %"PRIu16
"\n", dirstr
,
419 ntohs(set_tp
->port
));
421 ds_put_format(s
, " Set-%s-tcp/udp-port = null\n", dirstr
);
424 ds_put_format(s
, "unknown rte flow action (%d)\n", actions
->type
);
429 dump_flow(struct ds
*s
,
430 const struct rte_flow_attr
*attr
,
431 const struct rte_flow_item
*items
,
432 const struct rte_flow_action
*actions
)
435 dump_flow_attr(s
, attr
);
437 while (items
&& items
->type
!= RTE_FLOW_ITEM_TYPE_END
) {
438 dump_flow_pattern(s
, items
++);
440 while (actions
&& actions
->type
!= RTE_FLOW_ACTION_TYPE_END
) {
441 dump_flow_action(s
, actions
++);
446 static struct rte_flow
*
447 netdev_offload_dpdk_flow_create(struct netdev
*netdev
,
448 const struct rte_flow_attr
*attr
,
449 const struct rte_flow_item
*items
,
450 const struct rte_flow_action
*actions
,
451 struct rte_flow_error
*error
)
453 struct rte_flow
*flow
;
456 flow
= netdev_dpdk_rte_flow_create(netdev
, attr
, items
, actions
, error
);
458 if (!VLOG_DROP_DBG(&rl
)) {
460 dump_flow(&s
, attr
, items
, actions
);
461 VLOG_DBG_RL(&rl
, "%s: rte_flow 0x%"PRIxPTR
" created:\n%s",
462 netdev_get_name(netdev
), (intptr_t) flow
, ds_cstr(&s
));
466 enum vlog_level level
= VLL_WARN
;
468 if (error
->type
== RTE_FLOW_ERROR_TYPE_ACTION
) {
471 VLOG_RL(&rl
, level
, "%s: rte_flow creation failed: %d (%s).",
472 netdev_get_name(netdev
), error
->type
, error
->message
);
473 if (!vlog_should_drop(&this_module
, level
, &rl
)) {
475 dump_flow(&s
, attr
, items
, actions
);
476 VLOG_RL(&rl
, level
, "Failed flow:\n%s", ds_cstr(&s
));
484 add_flow_pattern(struct flow_patterns
*patterns
, enum rte_flow_item_type type
,
485 const void *spec
, const void *mask
)
487 int cnt
= patterns
->cnt
;
490 patterns
->current_max
= 8;
491 patterns
->items
= xcalloc(patterns
->current_max
,
492 sizeof *patterns
->items
);
493 } else if (cnt
== patterns
->current_max
) {
494 patterns
->current_max
*= 2;
495 patterns
->items
= xrealloc(patterns
->items
, patterns
->current_max
*
496 sizeof *patterns
->items
);
499 patterns
->items
[cnt
].type
= type
;
500 patterns
->items
[cnt
].spec
= spec
;
501 patterns
->items
[cnt
].mask
= mask
;
502 patterns
->items
[cnt
].last
= NULL
;
507 add_flow_action(struct flow_actions
*actions
, enum rte_flow_action_type type
,
510 int cnt
= actions
->cnt
;
513 actions
->current_max
= 8;
514 actions
->actions
= xcalloc(actions
->current_max
,
515 sizeof *actions
->actions
);
516 } else if (cnt
== actions
->current_max
) {
517 actions
->current_max
*= 2;
518 actions
->actions
= xrealloc(actions
->actions
, actions
->current_max
*
519 sizeof *actions
->actions
);
522 actions
->actions
[cnt
].type
= type
;
523 actions
->actions
[cnt
].conf
= conf
;
528 free_flow_patterns(struct flow_patterns
*patterns
)
532 for (i
= 0; i
< patterns
->cnt
; i
++) {
533 if (patterns
->items
[i
].spec
) {
534 free(CONST_CAST(void *, patterns
->items
[i
].spec
));
536 if (patterns
->items
[i
].mask
) {
537 free(CONST_CAST(void *, patterns
->items
[i
].mask
));
540 free(patterns
->items
);
541 patterns
->items
= NULL
;
546 free_flow_actions(struct flow_actions
*actions
)
550 for (i
= 0; i
< actions
->cnt
; i
++) {
551 if (actions
->actions
[i
].conf
) {
552 free(CONST_CAST(void *, actions
->actions
[i
].conf
));
555 free(actions
->actions
);
556 actions
->actions
= NULL
;
561 parse_flow_match(struct flow_patterns
*patterns
,
562 const struct match
*match
)
564 uint8_t *next_proto_mask
= NULL
;
568 if (!eth_addr_is_zero(match
->wc
.masks
.dl_src
) ||
569 !eth_addr_is_zero(match
->wc
.masks
.dl_dst
)) {
570 struct rte_flow_item_eth
*spec
, *mask
;
572 spec
= xzalloc(sizeof *spec
);
573 mask
= xzalloc(sizeof *mask
);
575 memcpy(&spec
->dst
, &match
->flow
.dl_dst
, sizeof spec
->dst
);
576 memcpy(&spec
->src
, &match
->flow
.dl_src
, sizeof spec
->src
);
577 spec
->type
= match
->flow
.dl_type
;
579 memcpy(&mask
->dst
, &match
->wc
.masks
.dl_dst
, sizeof mask
->dst
);
580 memcpy(&mask
->src
, &match
->wc
.masks
.dl_src
, sizeof mask
->src
);
581 mask
->type
= match
->wc
.masks
.dl_type
;
583 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, spec
, mask
);
586 * If user specifies a flow (like UDP flow) without L2 patterns,
587 * OVS will at least set the dl_type. Normally, it's enough to
588 * create an eth pattern just with it. Unluckily, some Intel's
589 * NIC (such as XL710) doesn't support that. Below is a workaround,
590 * which simply matches any L2 pkts.
592 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, NULL
, NULL
);
596 if (match
->wc
.masks
.vlans
[0].tci
&& match
->flow
.vlans
[0].tci
) {
597 struct rte_flow_item_vlan
*spec
, *mask
;
599 spec
= xzalloc(sizeof *spec
);
600 mask
= xzalloc(sizeof *mask
);
602 spec
->tci
= match
->flow
.vlans
[0].tci
& ~htons(VLAN_CFI
);
603 mask
->tci
= match
->wc
.masks
.vlans
[0].tci
& ~htons(VLAN_CFI
);
605 /* Match any protocols. */
606 mask
->inner_type
= 0;
608 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_VLAN
, spec
, mask
);
612 if (match
->flow
.dl_type
== htons(ETH_TYPE_IP
)) {
613 struct rte_flow_item_ipv4
*spec
, *mask
;
615 spec
= xzalloc(sizeof *spec
);
616 mask
= xzalloc(sizeof *mask
);
618 spec
->hdr
.type_of_service
= match
->flow
.nw_tos
;
619 spec
->hdr
.time_to_live
= match
->flow
.nw_ttl
;
620 spec
->hdr
.next_proto_id
= match
->flow
.nw_proto
;
621 spec
->hdr
.src_addr
= match
->flow
.nw_src
;
622 spec
->hdr
.dst_addr
= match
->flow
.nw_dst
;
624 mask
->hdr
.type_of_service
= match
->wc
.masks
.nw_tos
;
625 mask
->hdr
.time_to_live
= match
->wc
.masks
.nw_ttl
;
626 mask
->hdr
.next_proto_id
= match
->wc
.masks
.nw_proto
;
627 mask
->hdr
.src_addr
= match
->wc
.masks
.nw_src
;
628 mask
->hdr
.dst_addr
= match
->wc
.masks
.nw_dst
;
630 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_IPV4
, spec
, mask
);
632 /* Save proto for L4 protocol setup. */
633 proto
= spec
->hdr
.next_proto_id
&
634 mask
->hdr
.next_proto_id
;
635 next_proto_mask
= &mask
->hdr
.next_proto_id
;
638 if (proto
!= IPPROTO_ICMP
&& proto
!= IPPROTO_UDP
&&
639 proto
!= IPPROTO_SCTP
&& proto
!= IPPROTO_TCP
&&
640 (match
->wc
.masks
.tp_src
||
641 match
->wc
.masks
.tp_dst
||
642 match
->wc
.masks
.tcp_flags
)) {
643 VLOG_DBG("L4 Protocol (%u) not supported", proto
);
647 if ((match
->wc
.masks
.tp_src
&& match
->wc
.masks
.tp_src
!= OVS_BE16_MAX
) ||
648 (match
->wc
.masks
.tp_dst
&& match
->wc
.masks
.tp_dst
!= OVS_BE16_MAX
)) {
652 if (proto
== IPPROTO_TCP
) {
653 struct rte_flow_item_tcp
*spec
, *mask
;
655 spec
= xzalloc(sizeof *spec
);
656 mask
= xzalloc(sizeof *mask
);
658 spec
->hdr
.src_port
= match
->flow
.tp_src
;
659 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
660 spec
->hdr
.data_off
= ntohs(match
->flow
.tcp_flags
) >> 8;
661 spec
->hdr
.tcp_flags
= ntohs(match
->flow
.tcp_flags
) & 0xff;
663 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
664 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
665 mask
->hdr
.data_off
= ntohs(match
->wc
.masks
.tcp_flags
) >> 8;
666 mask
->hdr
.tcp_flags
= ntohs(match
->wc
.masks
.tcp_flags
) & 0xff;
668 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_TCP
, spec
, mask
);
670 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
671 if (next_proto_mask
) {
672 *next_proto_mask
= 0;
674 } else if (proto
== IPPROTO_UDP
) {
675 struct rte_flow_item_udp
*spec
, *mask
;
677 spec
= xzalloc(sizeof *spec
);
678 mask
= xzalloc(sizeof *mask
);
680 spec
->hdr
.src_port
= match
->flow
.tp_src
;
681 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
683 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
684 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
686 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_UDP
, spec
, mask
);
688 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
689 if (next_proto_mask
) {
690 *next_proto_mask
= 0;
692 } else if (proto
== IPPROTO_SCTP
) {
693 struct rte_flow_item_sctp
*spec
, *mask
;
695 spec
= xzalloc(sizeof *spec
);
696 mask
= xzalloc(sizeof *mask
);
698 spec
->hdr
.src_port
= match
->flow
.tp_src
;
699 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
701 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
702 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
704 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_SCTP
, spec
, mask
);
706 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
707 if (next_proto_mask
) {
708 *next_proto_mask
= 0;
710 } else if (proto
== IPPROTO_ICMP
) {
711 struct rte_flow_item_icmp
*spec
, *mask
;
713 spec
= xzalloc(sizeof *spec
);
714 mask
= xzalloc(sizeof *mask
);
716 spec
->hdr
.icmp_type
= (uint8_t) ntohs(match
->flow
.tp_src
);
717 spec
->hdr
.icmp_code
= (uint8_t) ntohs(match
->flow
.tp_dst
);
719 mask
->hdr
.icmp_type
= (uint8_t) ntohs(match
->wc
.masks
.tp_src
);
720 mask
->hdr
.icmp_code
= (uint8_t) ntohs(match
->wc
.masks
.tp_dst
);
722 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ICMP
, spec
, mask
);
724 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
725 if (next_proto_mask
) {
726 *next_proto_mask
= 0;
730 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_END
, NULL
, NULL
);
736 add_flow_mark_rss_actions(struct flow_actions
*actions
,
738 const struct netdev
*netdev
)
740 struct rte_flow_action_mark
*mark
;
741 struct action_rss_data
{
742 struct rte_flow_action_rss conf
;
745 BUILD_ASSERT_DECL(offsetof(struct action_rss_data
, conf
) == 0);
748 mark
= xzalloc(sizeof *mark
);
750 mark
->id
= flow_mark
;
751 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_MARK
, mark
);
753 rss_data
= xmalloc(sizeof *rss_data
+
754 netdev_n_rxq(netdev
) * sizeof rss_data
->queue
[0]);
755 *rss_data
= (struct action_rss_data
) {
756 .conf
= (struct rte_flow_action_rss
) {
757 .func
= RTE_ETH_HASH_FUNCTION_DEFAULT
,
760 .queue_num
= netdev_n_rxq(netdev
),
761 .queue
= rss_data
->queue
,
767 /* Override queue array with default. */
768 for (i
= 0; i
< netdev_n_rxq(netdev
); i
++) {
769 rss_data
->queue
[i
] = i
;
772 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_RSS
, &rss_data
->conf
);
773 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
776 static struct rte_flow
*
777 netdev_offload_dpdk_mark_rss(struct flow_patterns
*patterns
,
778 struct netdev
*netdev
,
781 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
782 const struct rte_flow_attr flow_attr
= {
788 struct rte_flow_error error
;
789 struct rte_flow
*flow
;
791 add_flow_mark_rss_actions(&actions
, flow_mark
, netdev
);
793 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
794 actions
.actions
, &error
);
796 free_flow_actions(&actions
);
801 add_count_action(struct flow_actions
*actions
)
803 struct rte_flow_action_count
*count
= xzalloc(sizeof *count
);
805 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_COUNT
, count
);
809 add_port_id_action(struct flow_actions
*actions
,
810 struct netdev
*outdev
)
812 struct rte_flow_action_port_id
*port_id
;
815 outdev_id
= netdev_dpdk_get_port_id(outdev
);
819 port_id
= xzalloc(sizeof *port_id
);
820 port_id
->id
= outdev_id
;
821 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_PORT_ID
, port_id
);
826 add_output_action(struct netdev
*netdev
,
827 struct flow_actions
*actions
,
828 const struct nlattr
*nla
,
829 struct offload_info
*info
)
831 struct netdev
*outdev
;
835 port
= nl_attr_get_odp_port(nla
);
836 outdev
= netdev_ports_get(port
, info
->dpif_class
);
837 if (outdev
== NULL
) {
838 VLOG_DBG_RL(&rl
, "Cannot find netdev for odp port %"PRIu32
, port
);
841 if (!netdev_flow_api_equals(netdev
, outdev
) ||
842 add_port_id_action(actions
, outdev
)) {
843 VLOG_DBG_RL(&rl
, "%s: Output to port \'%s\' cannot be offloaded.",
844 netdev_get_name(netdev
), netdev_get_name(outdev
));
847 netdev_close(outdev
);
852 add_set_flow_action__(struct flow_actions
*actions
,
853 const void *value
, void *mask
,
854 const size_t size
, const int attr
)
859 /* DPDK does not support partially masked set actions. In such
860 * case, fail the offload.
862 if (is_all_zeros(mask
, size
)) {
865 if (!is_all_ones(mask
, size
)) {
866 VLOG_DBG_RL(&rl
, "Partial mask is not supported");
871 spec
= xzalloc(size
);
872 memcpy(spec
, value
, size
);
873 add_flow_action(actions
, attr
, spec
);
875 /* Clear used mask for later checking. */
877 memset(mask
, 0, size
);
882 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac
) ==
883 MEMBER_SIZEOF(struct ovs_key_ethernet
, eth_src
));
884 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac
) ==
885 MEMBER_SIZEOF(struct ovs_key_ethernet
, eth_dst
));
886 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4
) ==
887 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_src
));
888 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4
) ==
889 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_dst
));
890 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl
) ==
891 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_ttl
));
892 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
893 MEMBER_SIZEOF(struct ovs_key_tcp
, tcp_src
));
894 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
895 MEMBER_SIZEOF(struct ovs_key_tcp
, tcp_dst
));
896 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
897 MEMBER_SIZEOF(struct ovs_key_udp
, udp_src
));
898 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
899 MEMBER_SIZEOF(struct ovs_key_udp
, udp_dst
));
902 parse_set_actions(struct flow_actions
*actions
,
903 const struct nlattr
*set_actions
,
904 const size_t set_actions_len
,
907 const struct nlattr
*sa
;
910 #define add_set_flow_action(field, type) \
911 if (add_set_flow_action__(actions, &key->field, \
912 mask ? CONST_CAST(void *, &mask->field) : NULL, \
913 sizeof key->field, type)) { \
917 NL_ATTR_FOR_EACH_UNSAFE (sa
, sleft
, set_actions
, set_actions_len
) {
918 if (nl_attr_type(sa
) == OVS_KEY_ATTR_ETHERNET
) {
919 const struct ovs_key_ethernet
*key
= nl_attr_get(sa
);
920 const struct ovs_key_ethernet
*mask
= masked
? key
+ 1 : NULL
;
922 add_set_flow_action(eth_src
, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
);
923 add_set_flow_action(eth_dst
, RTE_FLOW_ACTION_TYPE_SET_MAC_DST
);
925 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
926 VLOG_DBG_RL(&rl
, "Unsupported ETHERNET set action");
929 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_IPV4
) {
930 const struct ovs_key_ipv4
*key
= nl_attr_get(sa
);
931 const struct ovs_key_ipv4
*mask
= masked
? key
+ 1 : NULL
;
933 add_set_flow_action(ipv4_src
, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
);
934 add_set_flow_action(ipv4_dst
, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
);
935 add_set_flow_action(ipv4_ttl
, RTE_FLOW_ACTION_TYPE_SET_TTL
);
937 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
938 VLOG_DBG_RL(&rl
, "Unsupported IPv4 set action");
941 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_TCP
) {
942 const struct ovs_key_tcp
*key
= nl_attr_get(sa
);
943 const struct ovs_key_tcp
*mask
= masked
? key
+ 1 : NULL
;
945 add_set_flow_action(tcp_src
, RTE_FLOW_ACTION_TYPE_SET_TP_SRC
);
946 add_set_flow_action(tcp_dst
, RTE_FLOW_ACTION_TYPE_SET_TP_DST
);
948 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
949 VLOG_DBG_RL(&rl
, "Unsupported TCP set action");
952 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_UDP
) {
953 const struct ovs_key_udp
*key
= nl_attr_get(sa
);
954 const struct ovs_key_udp
*mask
= masked
? key
+ 1 : NULL
;
956 add_set_flow_action(udp_src
, RTE_FLOW_ACTION_TYPE_SET_TP_SRC
);
957 add_set_flow_action(udp_dst
, RTE_FLOW_ACTION_TYPE_SET_TP_DST
);
959 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
960 VLOG_DBG_RL(&rl
, "Unsupported UDP set action");
965 "Unsupported set action type %d", nl_attr_type(sa
));
974 parse_flow_actions(struct netdev
*netdev
,
975 struct flow_actions
*actions
,
976 struct nlattr
*nl_actions
,
977 size_t nl_actions_len
,
978 struct offload_info
*info
)
983 add_count_action(actions
);
984 NL_ATTR_FOR_EACH_UNSAFE (nla
, left
, nl_actions
, nl_actions_len
) {
985 if (nl_attr_type(nla
) == OVS_ACTION_ATTR_OUTPUT
) {
986 if (add_output_action(netdev
, actions
, nla
, info
)) {
989 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_DROP
) {
990 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_DROP
, NULL
);
991 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET
||
992 nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
) {
993 const struct nlattr
*set_actions
= nl_attr_get(nla
);
994 const size_t set_actions_len
= nl_attr_get_size(nla
);
995 bool masked
= nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
;
997 if (parse_set_actions(actions
, set_actions
, set_actions_len
,
1002 VLOG_DBG_RL(&rl
, "Unsupported action type %d", nl_attr_type(nla
));
1007 if (nl_actions_len
== 0) {
1008 VLOG_DBG_RL(&rl
, "No actions provided");
1012 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
1016 static struct rte_flow
*
1017 netdev_offload_dpdk_actions(struct netdev
*netdev
,
1018 struct flow_patterns
*patterns
,
1019 struct nlattr
*nl_actions
,
1021 struct offload_info
*info
)
1023 const struct rte_flow_attr flow_attr
= { .ingress
= 1, .transfer
= 1 };
1024 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
1025 struct rte_flow
*flow
= NULL
;
1026 struct rte_flow_error error
;
1029 ret
= parse_flow_actions(netdev
, &actions
, nl_actions
, actions_len
, info
);
1033 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
1034 actions
.actions
, &error
);
1036 free_flow_actions(&actions
);
1041 netdev_offload_dpdk_add_flow(struct netdev
*netdev
,
1042 const struct match
*match
,
1043 struct nlattr
*nl_actions
,
1045 const ovs_u128
*ufid
,
1046 struct offload_info
*info
)
1048 struct flow_patterns patterns
= { .items
= NULL
, .cnt
= 0 };
1049 bool actions_offloaded
= true;
1050 struct rte_flow
*flow
;
1053 ret
= parse_flow_match(&patterns
, match
);
1058 flow
= netdev_offload_dpdk_actions(netdev
, &patterns
, nl_actions
,
1061 /* If we failed to offload the rule actions fallback to MARK+RSS
1064 flow
= netdev_offload_dpdk_mark_rss(&patterns
, netdev
,
1066 actions_offloaded
= false;
1073 ufid_to_rte_flow_associate(ufid
, flow
, actions_offloaded
);
1074 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT
"\n",
1075 netdev_get_name(netdev
), flow
, UUID_ARGS((struct uuid
*)ufid
));
1078 free_flow_patterns(&patterns
);
1083 * Check if any unsupported flow patterns are specified.
1086 netdev_offload_dpdk_validate_flow(const struct match
*match
)
1088 struct match match_zero_wc
;
1089 const struct flow
*masks
= &match
->wc
.masks
;
1091 /* Create a wc-zeroed version of flow. */
1092 match_init(&match_zero_wc
, &match
->flow
, &match
->wc
);
1094 if (!is_all_zeros(&match_zero_wc
.flow
.tunnel
,
1095 sizeof match_zero_wc
.flow
.tunnel
)) {
1099 if (masks
->metadata
|| masks
->skb_priority
||
1100 masks
->pkt_mark
|| masks
->dp_hash
) {
1104 /* recirc id must be zero. */
1105 if (match_zero_wc
.flow
.recirc_id
) {
1109 if (masks
->ct_state
|| masks
->ct_nw_proto
||
1110 masks
->ct_zone
|| masks
->ct_mark
||
1111 !ovs_u128_is_zero(masks
->ct_label
)) {
1115 if (masks
->conj_id
|| masks
->actset_output
) {
1119 /* Unsupported L2. */
1120 if (!is_all_zeros(masks
->mpls_lse
, sizeof masks
->mpls_lse
)) {
1124 /* Unsupported L3. */
1125 if (masks
->ipv6_label
|| masks
->ct_nw_src
|| masks
->ct_nw_dst
||
1126 !is_all_zeros(&masks
->ipv6_src
, sizeof masks
->ipv6_src
) ||
1127 !is_all_zeros(&masks
->ipv6_dst
, sizeof masks
->ipv6_dst
) ||
1128 !is_all_zeros(&masks
->ct_ipv6_src
, sizeof masks
->ct_ipv6_src
) ||
1129 !is_all_zeros(&masks
->ct_ipv6_dst
, sizeof masks
->ct_ipv6_dst
) ||
1130 !is_all_zeros(&masks
->nd_target
, sizeof masks
->nd_target
) ||
1131 !is_all_zeros(&masks
->nsh
, sizeof masks
->nsh
) ||
1132 !is_all_zeros(&masks
->arp_sha
, sizeof masks
->arp_sha
) ||
1133 !is_all_zeros(&masks
->arp_tha
, sizeof masks
->arp_tha
)) {
1137 /* If fragmented, then don't HW accelerate - for now. */
1138 if (match_zero_wc
.flow
.nw_frag
) {
1142 /* Unsupported L4. */
1143 if (masks
->igmp_group_ip4
|| masks
->ct_tp_src
|| masks
->ct_tp_dst
) {
1150 VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols");
1155 netdev_offload_dpdk_destroy_flow(struct netdev
*netdev
,
1156 const ovs_u128
*ufid
,
1157 struct rte_flow
*rte_flow
)
1159 struct rte_flow_error error
;
1160 int ret
= netdev_dpdk_rte_flow_destroy(netdev
, rte_flow
, &error
);
1163 ufid_to_rte_flow_disassociate(ufid
);
1164 VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT
"\n",
1165 netdev_get_name(netdev
), rte_flow
,
1166 UUID_ARGS((struct uuid
*)ufid
));
1168 VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n",
1169 netdev_get_name(netdev
), error
.message
, error
.type
);
1176 netdev_offload_dpdk_flow_put(struct netdev
*netdev
, struct match
*match
,
1177 struct nlattr
*actions
, size_t actions_len
,
1178 const ovs_u128
*ufid
, struct offload_info
*info
,
1179 struct dpif_flow_stats
*stats
)
1181 struct ufid_to_rte_flow_data
*rte_flow_data
;
1185 * If an old rte_flow exists, it means it's a flow modification.
1186 * Here destroy the old rte flow first before adding a new one.
1188 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1189 if (rte_flow_data
&& rte_flow_data
->rte_flow
) {
1190 ret
= netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1191 rte_flow_data
->rte_flow
);
1197 ret
= netdev_offload_dpdk_validate_flow(match
);
1203 memset(stats
, 0, sizeof *stats
);
1205 return netdev_offload_dpdk_add_flow(netdev
, match
, actions
,
1206 actions_len
, ufid
, info
);
1210 netdev_offload_dpdk_flow_del(struct netdev
*netdev
, const ovs_u128
*ufid
,
1211 struct dpif_flow_stats
*stats
)
1213 struct ufid_to_rte_flow_data
*rte_flow_data
;
1215 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1216 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1221 memset(stats
, 0, sizeof *stats
);
1223 return netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1224 rte_flow_data
->rte_flow
);
1228 netdev_offload_dpdk_init_flow_api(struct netdev
*netdev
)
1230 return netdev_dpdk_flow_api_supported(netdev
) ? 0 : EOPNOTSUPP
;
1234 netdev_offload_dpdk_flow_get(struct netdev
*netdev
,
1235 struct match
*match OVS_UNUSED
,
1236 struct nlattr
**actions OVS_UNUSED
,
1237 const ovs_u128
*ufid
,
1238 struct dpif_flow_stats
*stats
,
1239 struct dpif_flow_attrs
*attrs
,
1240 struct ofpbuf
*buf OVS_UNUSED
)
1242 struct rte_flow_query_count query
= { .reset
= 1 };
1243 struct ufid_to_rte_flow_data
*rte_flow_data
;
1244 struct rte_flow_error error
;
1247 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1248 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1253 attrs
->offloaded
= true;
1254 if (!rte_flow_data
->actions_offloaded
) {
1255 attrs
->dp_layer
= "ovs";
1256 memset(stats
, 0, sizeof *stats
);
1259 attrs
->dp_layer
= "dpdk";
1260 ret
= netdev_dpdk_rte_flow_query_count(netdev
, rte_flow_data
->rte_flow
,
1263 VLOG_DBG_RL(&rl
, "%s: Failed to query ufid "UUID_FMT
" flow: %p\n",
1264 netdev_get_name(netdev
), UUID_ARGS((struct uuid
*) ufid
),
1265 rte_flow_data
->rte_flow
);
1268 rte_flow_data
->stats
.n_packets
+= (query
.hits_set
) ? query
.hits
: 0;
1269 rte_flow_data
->stats
.n_bytes
+= (query
.bytes_set
) ? query
.bytes
: 0;
1270 if (query
.hits_set
&& query
.hits
) {
1271 rte_flow_data
->stats
.used
= time_msec();
1273 memcpy(stats
, &rte_flow_data
->stats
, sizeof *stats
);
1278 const struct netdev_flow_api netdev_offload_dpdk
= {
1279 .type
= "dpdk_flow_api",
1280 .flow_put
= netdev_offload_dpdk_flow_put
,
1281 .flow_del
= netdev_offload_dpdk_flow_del
,
1282 .init_flow_api
= netdev_offload_dpdk_init_flow_api
,
1283 .flow_get
= netdev_offload_dpdk_flow_get
,