2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 #include <sys/types.h>
20 #include <netinet/ip6.h>
24 #include "dpif-netdev.h"
25 #include "netdev-offload-provider.h"
26 #include "netdev-provider.h"
27 #include "openvswitch/match.h"
28 #include "openvswitch/vlog.h"
32 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk
);
33 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(100, 5);
38 * Below API is NOT thread safe in following terms:
40 * - The caller must be sure that none of these functions will be called
41 * simultaneously. Even for different 'netdev's.
43 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
45 * - The caller must be sure that 'netdev' configuration will not be changed.
46 * For example, simultaneous call of 'netdev_reconfigure()' for the same
47 * 'netdev' is forbidden.
49 * For current implementation all above restrictions could be fulfilled by
50 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
53 * A mapping from ufid to dpdk rte_flow.
55 static struct cmap ufid_to_rte_flow
= CMAP_INITIALIZER
;
57 struct ufid_to_rte_flow_data
{
58 struct cmap_node node
;
60 struct rte_flow
*rte_flow
;
61 bool actions_offloaded
;
62 struct dpif_flow_stats stats
;
65 /* Find rte_flow with @ufid. */
66 static struct ufid_to_rte_flow_data
*
67 ufid_to_rte_flow_data_find(const ovs_u128
*ufid
)
69 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
70 struct ufid_to_rte_flow_data
*data
;
72 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
73 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
81 static inline struct ufid_to_rte_flow_data
*
82 ufid_to_rte_flow_associate(const ovs_u128
*ufid
,
83 struct rte_flow
*rte_flow
, bool actions_offloaded
)
85 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
86 struct ufid_to_rte_flow_data
*data
= xzalloc(sizeof *data
);
87 struct ufid_to_rte_flow_data
*data_prev
;
90 * We should not simply overwrite an existing rte flow.
91 * We should have deleted it first before re-adding it.
92 * Thus, if following assert triggers, something is wrong:
93 * the rte_flow is not destroyed.
95 data_prev
= ufid_to_rte_flow_data_find(ufid
);
97 ovs_assert(data_prev
->rte_flow
== NULL
);
101 data
->rte_flow
= rte_flow
;
102 data
->actions_offloaded
= actions_offloaded
;
104 cmap_insert(&ufid_to_rte_flow
,
105 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
110 ufid_to_rte_flow_disassociate(const ovs_u128
*ufid
)
112 size_t hash
= hash_bytes(ufid
, sizeof *ufid
, 0);
113 struct ufid_to_rte_flow_data
*data
;
115 CMAP_FOR_EACH_WITH_HASH (data
, node
, hash
, &ufid_to_rte_flow
) {
116 if (ovs_u128_equals(*ufid
, data
->ufid
)) {
117 cmap_remove(&ufid_to_rte_flow
,
118 CONST_CAST(struct cmap_node
*, &data
->node
), hash
);
119 ovsrcu_postpone(free
, data
);
124 VLOG_WARN("ufid "UUID_FMT
" is not associated with an rte flow",
125 UUID_ARGS((struct uuid
*) ufid
));
129 * To avoid individual xrealloc calls for each new element, a 'curent_max'
130 * is used to keep track of current allocated number of elements. Starts
131 * by 8 and doubles on each xrealloc call.
133 struct flow_patterns
{
134 struct rte_flow_item
*items
;
139 struct flow_actions
{
140 struct rte_flow_action
*actions
;
146 dump_flow_attr(struct ds
*s
, const struct rte_flow_attr
*attr
)
148 ds_put_format(s
, "%s%spriority %"PRIu32
" group %"PRIu32
" %s",
149 attr
->ingress
? "ingress " : "",
150 attr
->egress
? "egress " : "", attr
->priority
, attr
->group
,
151 attr
->transfer
? "transfer " : "");
154 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
155 * 'testpmd command'-like format. */
156 #define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
157 if (is_all_ones(&mask, sizeof mask)) { \
158 ds_put_format(s, field " is " fmt " ", spec_pri); \
159 } else if (!is_all_zeros(&mask, sizeof mask)) { \
160 ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
161 spec_pri, mask_pri); \
165 dump_flow_pattern(struct ds
*s
, const struct rte_flow_item
*item
)
167 if (item
->type
== RTE_FLOW_ITEM_TYPE_ETH
) {
168 const struct rte_flow_item_eth
*eth_spec
= item
->spec
;
169 const struct rte_flow_item_eth
*eth_mask
= item
->mask
;
171 ds_put_cstr(s
, "eth ");
174 eth_mask
= &rte_flow_item_eth_mask
;
176 DUMP_PATTERN_ITEM(eth_mask
->src
, "src", ETH_ADDR_FMT
,
177 ETH_ADDR_BYTES_ARGS(eth_spec
->src
.addr_bytes
),
178 ETH_ADDR_BYTES_ARGS(eth_mask
->src
.addr_bytes
));
179 DUMP_PATTERN_ITEM(eth_mask
->dst
, "dst", ETH_ADDR_FMT
,
180 ETH_ADDR_BYTES_ARGS(eth_spec
->dst
.addr_bytes
),
181 ETH_ADDR_BYTES_ARGS(eth_mask
->dst
.addr_bytes
));
182 DUMP_PATTERN_ITEM(eth_mask
->type
, "type", "0x%04"PRIx16
,
183 ntohs(eth_spec
->type
),
184 ntohs(eth_mask
->type
));
186 ds_put_cstr(s
, "/ ");
187 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_VLAN
) {
188 const struct rte_flow_item_vlan
*vlan_spec
= item
->spec
;
189 const struct rte_flow_item_vlan
*vlan_mask
= item
->mask
;
191 ds_put_cstr(s
, "vlan ");
194 vlan_mask
= &rte_flow_item_vlan_mask
;
196 DUMP_PATTERN_ITEM(vlan_mask
->inner_type
, "inner_type", "0x%"PRIx16
,
197 ntohs(vlan_spec
->inner_type
),
198 ntohs(vlan_mask
->inner_type
));
199 DUMP_PATTERN_ITEM(vlan_mask
->tci
, "tci", "0x%"PRIx16
,
200 ntohs(vlan_spec
->tci
), ntohs(vlan_mask
->tci
));
202 ds_put_cstr(s
, "/ ");
203 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_IPV4
) {
204 const struct rte_flow_item_ipv4
*ipv4_spec
= item
->spec
;
205 const struct rte_flow_item_ipv4
*ipv4_mask
= item
->mask
;
207 ds_put_cstr(s
, "ipv4 ");
210 ipv4_mask
= &rte_flow_item_ipv4_mask
;
212 DUMP_PATTERN_ITEM(ipv4_mask
->hdr
.src_addr
, "src", IP_FMT
,
213 IP_ARGS(ipv4_spec
->hdr
.src_addr
),
214 IP_ARGS(ipv4_mask
->hdr
.src_addr
));
215 DUMP_PATTERN_ITEM(ipv4_mask
->hdr
.dst_addr
, "dst", IP_FMT
,
216 IP_ARGS(ipv4_spec
->hdr
.dst_addr
),
217 IP_ARGS(ipv4_mask
->hdr
.dst_addr
));
218 DUMP_PATTERN_ITEM(ipv4_mask
->hdr
.next_proto_id
, "proto",
219 "0x%"PRIx8
, ipv4_spec
->hdr
.next_proto_id
,
220 ipv4_mask
->hdr
.next_proto_id
);
221 DUMP_PATTERN_ITEM(ipv4_mask
->hdr
.type_of_service
, "tos",
222 "0x%"PRIx8
, ipv4_spec
->hdr
.type_of_service
,
223 ipv4_mask
->hdr
.type_of_service
);
224 DUMP_PATTERN_ITEM(ipv4_mask
->hdr
.time_to_live
, "ttl",
225 "0x%"PRIx8
, ipv4_spec
->hdr
.time_to_live
,
226 ipv4_mask
->hdr
.time_to_live
);
228 ds_put_cstr(s
, "/ ");
229 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_UDP
) {
230 const struct rte_flow_item_udp
*udp_spec
= item
->spec
;
231 const struct rte_flow_item_udp
*udp_mask
= item
->mask
;
233 ds_put_cstr(s
, "udp ");
236 udp_mask
= &rte_flow_item_udp_mask
;
238 DUMP_PATTERN_ITEM(udp_mask
->hdr
.src_port
, "src", "%"PRIu16
,
239 ntohs(udp_spec
->hdr
.src_port
),
240 ntohs(udp_mask
->hdr
.src_port
));
241 DUMP_PATTERN_ITEM(udp_mask
->hdr
.dst_port
, "dst", "%"PRIu16
,
242 ntohs(udp_spec
->hdr
.dst_port
),
243 ntohs(udp_mask
->hdr
.dst_port
));
245 ds_put_cstr(s
, "/ ");
246 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_SCTP
) {
247 const struct rte_flow_item_sctp
*sctp_spec
= item
->spec
;
248 const struct rte_flow_item_sctp
*sctp_mask
= item
->mask
;
250 ds_put_cstr(s
, "sctp ");
253 sctp_mask
= &rte_flow_item_sctp_mask
;
255 DUMP_PATTERN_ITEM(sctp_mask
->hdr
.src_port
, "src", "%"PRIu16
,
256 ntohs(sctp_spec
->hdr
.src_port
),
257 ntohs(sctp_mask
->hdr
.src_port
));
258 DUMP_PATTERN_ITEM(sctp_mask
->hdr
.dst_port
, "dst", "%"PRIu16
,
259 ntohs(sctp_spec
->hdr
.dst_port
),
260 ntohs(sctp_mask
->hdr
.dst_port
));
262 ds_put_cstr(s
, "/ ");
263 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_ICMP
) {
264 const struct rte_flow_item_icmp
*icmp_spec
= item
->spec
;
265 const struct rte_flow_item_icmp
*icmp_mask
= item
->mask
;
267 ds_put_cstr(s
, "icmp ");
270 icmp_mask
= &rte_flow_item_icmp_mask
;
272 DUMP_PATTERN_ITEM(icmp_mask
->hdr
.icmp_type
, "icmp_type", "%"PRIu8
,
273 icmp_spec
->hdr
.icmp_type
,
274 icmp_mask
->hdr
.icmp_type
);
275 DUMP_PATTERN_ITEM(icmp_mask
->hdr
.icmp_code
, "icmp_code", "%"PRIu8
,
276 icmp_spec
->hdr
.icmp_code
,
277 icmp_mask
->hdr
.icmp_code
);
279 ds_put_cstr(s
, "/ ");
280 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_TCP
) {
281 const struct rte_flow_item_tcp
*tcp_spec
= item
->spec
;
282 const struct rte_flow_item_tcp
*tcp_mask
= item
->mask
;
284 ds_put_cstr(s
, "tcp ");
287 tcp_mask
= &rte_flow_item_tcp_mask
;
289 DUMP_PATTERN_ITEM(tcp_mask
->hdr
.src_port
, "src", "%"PRIu16
,
290 ntohs(tcp_spec
->hdr
.src_port
),
291 ntohs(tcp_mask
->hdr
.src_port
));
292 DUMP_PATTERN_ITEM(tcp_mask
->hdr
.dst_port
, "dst", "%"PRIu16
,
293 ntohs(tcp_spec
->hdr
.dst_port
),
294 ntohs(tcp_mask
->hdr
.dst_port
));
295 DUMP_PATTERN_ITEM(tcp_mask
->hdr
.tcp_flags
, "flags", "0x%"PRIx8
,
296 tcp_spec
->hdr
.tcp_flags
,
297 tcp_mask
->hdr
.tcp_flags
);
299 ds_put_cstr(s
, "/ ");
300 } else if (item
->type
== RTE_FLOW_ITEM_TYPE_IPV6
) {
301 const struct rte_flow_item_ipv6
*ipv6_spec
= item
->spec
;
302 const struct rte_flow_item_ipv6
*ipv6_mask
= item
->mask
;
304 char addr_str
[INET6_ADDRSTRLEN
];
305 char mask_str
[INET6_ADDRSTRLEN
];
306 struct in6_addr addr
, mask
;
308 ds_put_cstr(s
, "ipv6 ");
311 ipv6_mask
= &rte_flow_item_ipv6_mask
;
313 memcpy(&addr
, ipv6_spec
->hdr
.src_addr
, sizeof addr
);
314 memcpy(&mask
, ipv6_mask
->hdr
.src_addr
, sizeof mask
);
315 ipv6_string_mapped(addr_str
, &addr
);
316 ipv6_string_mapped(mask_str
, &mask
);
317 DUMP_PATTERN_ITEM(mask
, "src", "%s", addr_str
, mask_str
);
319 memcpy(&addr
, ipv6_spec
->hdr
.dst_addr
, sizeof addr
);
320 memcpy(&mask
, ipv6_mask
->hdr
.dst_addr
, sizeof mask
);
321 ipv6_string_mapped(addr_str
, &addr
);
322 ipv6_string_mapped(mask_str
, &mask
);
323 DUMP_PATTERN_ITEM(mask
, "dst", "%s", addr_str
, mask_str
);
325 DUMP_PATTERN_ITEM(ipv6_mask
->hdr
.proto
, "proto", "%"PRIu8
,
326 ipv6_spec
->hdr
.proto
, ipv6_mask
->hdr
.proto
);
327 DUMP_PATTERN_ITEM(ipv6_mask
->hdr
.vtc_flow
, "tc", "0x%"PRIx32
,
328 ntohl(ipv6_spec
->hdr
.vtc_flow
),
329 ntohl(ipv6_mask
->hdr
.vtc_flow
));
330 DUMP_PATTERN_ITEM(ipv6_mask
->hdr
.hop_limits
, "hop", "%"PRIu8
,
331 ipv6_spec
->hdr
.hop_limits
,
332 ipv6_mask
->hdr
.hop_limits
);
334 ds_put_cstr(s
, "/ ");
336 ds_put_format(s
, "unknown rte flow pattern (%d)\n", item
->type
);
341 dump_vxlan_encap(struct ds
*s
, const struct rte_flow_item
*items
)
343 const struct rte_flow_item_eth
*eth
= NULL
;
344 const struct rte_flow_item_ipv4
*ipv4
= NULL
;
345 const struct rte_flow_item_ipv6
*ipv6
= NULL
;
346 const struct rte_flow_item_udp
*udp
= NULL
;
347 const struct rte_flow_item_vxlan
*vxlan
= NULL
;
349 for (; items
&& items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
350 if (items
->type
== RTE_FLOW_ITEM_TYPE_ETH
) {
352 } else if (items
->type
== RTE_FLOW_ITEM_TYPE_IPV4
) {
354 } else if (items
->type
== RTE_FLOW_ITEM_TYPE_IPV6
) {
356 } else if (items
->type
== RTE_FLOW_ITEM_TYPE_UDP
) {
358 } else if (items
->type
== RTE_FLOW_ITEM_TYPE_VXLAN
) {
363 ds_put_format(s
, "set vxlan ip-version %s ",
364 ipv4
? "ipv4" : ipv6
? "ipv6" : "ERR");
366 ds_put_format(s
, "vni %"PRIu32
" ",
367 ntohl(*(ovs_be32
*) vxlan
->vni
) >> 8);
370 ds_put_format(s
, "udp-src %"PRIu16
" udp-dst %"PRIu16
" ",
371 ntohs(udp
->hdr
.src_port
), ntohs(udp
->hdr
.dst_port
));
374 ds_put_format(s
, "ip-src "IP_FMT
" ip-dst "IP_FMT
" ",
375 IP_ARGS(ipv4
->hdr
.src_addr
),
376 IP_ARGS(ipv4
->hdr
.dst_addr
));
379 struct in6_addr addr
;
381 ds_put_cstr(s
, "ip-src ");
382 memcpy(&addr
, ipv6
->hdr
.src_addr
, sizeof addr
);
383 ipv6_format_mapped(&addr
, s
);
384 ds_put_cstr(s
, " ip-dst ");
385 memcpy(&addr
, ipv6
->hdr
.dst_addr
, sizeof addr
);
386 ipv6_format_mapped(&addr
, s
);
390 ds_put_format(s
, "eth-src "ETH_ADDR_FMT
" eth-dst "ETH_ADDR_FMT
,
391 ETH_ADDR_BYTES_ARGS(eth
->src
.addr_bytes
),
392 ETH_ADDR_BYTES_ARGS(eth
->dst
.addr_bytes
));
397 dump_flow_action(struct ds
*s
, struct ds
*s_extra
,
398 const struct rte_flow_action
*actions
)
400 if (actions
->type
== RTE_FLOW_ACTION_TYPE_MARK
) {
401 const struct rte_flow_action_mark
*mark
= actions
->conf
;
403 ds_put_cstr(s
, "mark ");
405 ds_put_format(s
, "id %d ", mark
->id
);
407 ds_put_cstr(s
, "/ ");
408 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_RSS
) {
409 ds_put_cstr(s
, "rss / ");
410 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_COUNT
) {
411 ds_put_cstr(s
, "count / ");
412 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_PORT_ID
) {
413 const struct rte_flow_action_port_id
*port_id
= actions
->conf
;
415 ds_put_cstr(s
, "port_id ");
417 ds_put_format(s
, "original %d id %d ",
418 port_id
->original
, port_id
->id
);
420 ds_put_cstr(s
, "/ ");
421 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_DROP
) {
422 ds_put_cstr(s
, "drop / ");
423 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
||
424 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_DST
) {
425 const struct rte_flow_action_set_mac
*set_mac
= actions
->conf
;
427 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_DST
430 ds_put_format(s
, "set_mac_%s ", dirstr
);
432 ds_put_format(s
, "mac_addr "ETH_ADDR_FMT
" ",
433 ETH_ADDR_BYTES_ARGS(set_mac
->mac_addr
));
435 ds_put_cstr(s
, "/ ");
436 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
||
437 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
) {
438 const struct rte_flow_action_set_ipv4
*set_ipv4
= actions
->conf
;
439 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
442 ds_put_format(s
, "set_ipv4_%s ", dirstr
);
444 ds_put_format(s
, "ipv4_addr "IP_FMT
" ",
445 IP_ARGS(set_ipv4
->ipv4_addr
));
447 ds_put_cstr(s
, "/ ");
448 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TTL
) {
449 const struct rte_flow_action_set_ttl
*set_ttl
= actions
->conf
;
451 ds_put_cstr(s
, "set_ttl ");
453 ds_put_format(s
, "ttl_value %d ", set_ttl
->ttl_value
);
455 ds_put_cstr(s
, "/ ");
456 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_SRC
||
457 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_DST
) {
458 const struct rte_flow_action_set_tp
*set_tp
= actions
->conf
;
459 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_DST
462 ds_put_format(s
, "set_tp_%s ", dirstr
);
464 ds_put_format(s
, "port %"PRIu16
" ", ntohs(set_tp
->port
));
466 ds_put_cstr(s
, "/ ");
467 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
) {
468 const struct rte_flow_action_of_push_vlan
*of_push_vlan
=
471 ds_put_cstr(s
, "of_push_vlan ");
473 ds_put_format(s
, "ethertype 0x%"PRIx16
" ",
474 ntohs(of_push_vlan
->ethertype
));
476 ds_put_cstr(s
, "/ ");
477 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
) {
478 const struct rte_flow_action_of_set_vlan_pcp
*of_set_vlan_pcp
=
481 ds_put_cstr(s
, "of_set_vlan_pcp ");
482 if (of_set_vlan_pcp
) {
483 ds_put_format(s
, "vlan_pcp %"PRIu8
" ", of_set_vlan_pcp
->vlan_pcp
);
485 ds_put_cstr(s
, "/ ");
486 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
) {
487 const struct rte_flow_action_of_set_vlan_vid
*of_set_vlan_vid
=
490 ds_put_cstr(s
, "of_set_vlan_vid ");
491 if (of_set_vlan_vid
) {
492 ds_put_format(s
, "vlan_vid %"PRIu16
" ",
493 ntohs(of_set_vlan_vid
->vlan_vid
));
495 ds_put_cstr(s
, "/ ");
496 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
) {
497 ds_put_cstr(s
, "of_pop_vlan / ");
498 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
||
499 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
) {
500 const struct rte_flow_action_set_ipv6
*set_ipv6
= actions
->conf
;
502 char *dirstr
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
505 ds_put_format(s
, "set_ipv6_%s ", dirstr
);
507 ds_put_cstr(s
, "ipv6_addr ");
508 ipv6_format_addr((struct in6_addr
*) &set_ipv6
->ipv6_addr
, s
);
511 ds_put_cstr(s
, "/ ");
512 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_RAW_ENCAP
) {
513 const struct rte_flow_action_raw_encap
*raw_encap
= actions
->conf
;
515 ds_put_cstr(s
, "raw_encap index 0 / ");
517 ds_put_format(s_extra
, "Raw-encap size=%ld set raw_encap 0 raw "
518 "pattern is ", raw_encap
->size
);
519 for (int i
= 0; i
< raw_encap
->size
; i
++) {
520 ds_put_format(s_extra
, "%02x", raw_encap
->data
[i
]);
522 ds_put_cstr(s_extra
, " / end_set;");
524 } else if (actions
->type
== RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
) {
525 const struct rte_flow_action_vxlan_encap
*vxlan_encap
= actions
->conf
;
526 const struct rte_flow_item
*items
= vxlan_encap
->definition
;
528 ds_put_cstr(s
, "vxlan_encap / ");
529 dump_vxlan_encap(s_extra
, items
);
530 ds_put_cstr(s_extra
, ";");
532 ds_put_format(s
, "unknown rte flow action (%d)\n", actions
->type
);
537 dump_flow(struct ds
*s
, struct ds
*s_extra
,
538 const struct rte_flow_attr
*attr
,
539 const struct rte_flow_item
*items
,
540 const struct rte_flow_action
*actions
)
543 dump_flow_attr(s
, attr
);
545 ds_put_cstr(s
, "pattern ");
546 while (items
&& items
->type
!= RTE_FLOW_ITEM_TYPE_END
) {
547 dump_flow_pattern(s
, items
++);
549 ds_put_cstr(s
, "end actions ");
550 while (actions
&& actions
->type
!= RTE_FLOW_ACTION_TYPE_END
) {
551 dump_flow_action(s
, s_extra
, actions
++);
553 ds_put_cstr(s
, "end");
557 static struct rte_flow
*
558 netdev_offload_dpdk_flow_create(struct netdev
*netdev
,
559 const struct rte_flow_attr
*attr
,
560 const struct rte_flow_item
*items
,
561 const struct rte_flow_action
*actions
,
562 struct rte_flow_error
*error
)
564 struct ds s_extra
= DS_EMPTY_INITIALIZER
;
565 struct ds s
= DS_EMPTY_INITIALIZER
;
566 struct rte_flow
*flow
;
569 flow
= netdev_dpdk_rte_flow_create(netdev
, attr
, items
, actions
, error
);
571 if (!VLOG_DROP_DBG(&rl
)) {
572 dump_flow(&s
, &s_extra
, attr
, items
, actions
);
573 extra_str
= ds_cstr(&s_extra
);
574 VLOG_DBG_RL(&rl
, "%s: rte_flow 0x%"PRIxPTR
" %s flow create %d %s",
575 netdev_get_name(netdev
), (intptr_t) flow
, extra_str
,
576 netdev_dpdk_get_port_id(netdev
), ds_cstr(&s
));
579 enum vlog_level level
= VLL_WARN
;
581 if (error
->type
== RTE_FLOW_ERROR_TYPE_ACTION
) {
584 VLOG_RL(&rl
, level
, "%s: rte_flow creation failed: %d (%s).",
585 netdev_get_name(netdev
), error
->type
, error
->message
);
586 if (!vlog_should_drop(&this_module
, level
, &rl
)) {
587 dump_flow(&s
, &s_extra
, attr
, items
, actions
);
588 extra_str
= ds_cstr(&s_extra
);
589 VLOG_RL(&rl
, level
, "%s: Failed flow: %s flow create %d %s",
590 netdev_get_name(netdev
), extra_str
,
591 netdev_dpdk_get_port_id(netdev
), ds_cstr(&s
));
595 ds_destroy(&s_extra
);
600 add_flow_pattern(struct flow_patterns
*patterns
, enum rte_flow_item_type type
,
601 const void *spec
, const void *mask
)
603 int cnt
= patterns
->cnt
;
606 patterns
->current_max
= 8;
607 patterns
->items
= xcalloc(patterns
->current_max
,
608 sizeof *patterns
->items
);
609 } else if (cnt
== patterns
->current_max
) {
610 patterns
->current_max
*= 2;
611 patterns
->items
= xrealloc(patterns
->items
, patterns
->current_max
*
612 sizeof *patterns
->items
);
615 patterns
->items
[cnt
].type
= type
;
616 patterns
->items
[cnt
].spec
= spec
;
617 patterns
->items
[cnt
].mask
= mask
;
618 patterns
->items
[cnt
].last
= NULL
;
623 add_flow_action(struct flow_actions
*actions
, enum rte_flow_action_type type
,
626 int cnt
= actions
->cnt
;
629 actions
->current_max
= 8;
630 actions
->actions
= xcalloc(actions
->current_max
,
631 sizeof *actions
->actions
);
632 } else if (cnt
== actions
->current_max
) {
633 actions
->current_max
*= 2;
634 actions
->actions
= xrealloc(actions
->actions
, actions
->current_max
*
635 sizeof *actions
->actions
);
638 actions
->actions
[cnt
].type
= type
;
639 actions
->actions
[cnt
].conf
= conf
;
644 free_flow_patterns(struct flow_patterns
*patterns
)
648 for (i
= 0; i
< patterns
->cnt
; i
++) {
649 if (patterns
->items
[i
].spec
) {
650 free(CONST_CAST(void *, patterns
->items
[i
].spec
));
652 if (patterns
->items
[i
].mask
) {
653 free(CONST_CAST(void *, patterns
->items
[i
].mask
));
656 free(patterns
->items
);
657 patterns
->items
= NULL
;
662 free_flow_actions(struct flow_actions
*actions
)
666 for (i
= 0; i
< actions
->cnt
; i
++) {
667 if (actions
->actions
[i
].conf
) {
668 free(CONST_CAST(void *, actions
->actions
[i
].conf
));
671 free(actions
->actions
);
672 actions
->actions
= NULL
;
677 parse_flow_match(struct flow_patterns
*patterns
,
680 uint8_t *next_proto_mask
= NULL
;
681 struct flow
*consumed_masks
;
684 consumed_masks
= &match
->wc
.masks
;
686 if (!flow_tnl_dst_is_set(&match
->flow
.tunnel
)) {
687 memset(&consumed_masks
->tunnel
, 0, sizeof consumed_masks
->tunnel
);
690 memset(&consumed_masks
->in_port
, 0, sizeof consumed_masks
->in_port
);
691 /* recirc id must be zero. */
692 if (match
->wc
.masks
.recirc_id
& match
->flow
.recirc_id
) {
695 consumed_masks
->recirc_id
= 0;
696 consumed_masks
->packet_type
= 0;
699 if (match
->wc
.masks
.dl_type
== OVS_BE16_MAX
&& is_ip_any(&match
->flow
)
700 && eth_addr_is_zero(match
->wc
.masks
.dl_dst
)
701 && eth_addr_is_zero(match
->wc
.masks
.dl_src
)) {
703 * This is a temporary work around to fix ethernet pattern for partial
704 * hardware offload for X710 devices. This fix will be reverted once
705 * the issue is fixed within the i40e PMD driver.
707 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, NULL
, NULL
);
709 memset(&consumed_masks
->dl_dst
, 0, sizeof consumed_masks
->dl_dst
);
710 memset(&consumed_masks
->dl_src
, 0, sizeof consumed_masks
->dl_src
);
711 consumed_masks
->dl_type
= 0;
712 } else if (match
->wc
.masks
.dl_type
||
713 !eth_addr_is_zero(match
->wc
.masks
.dl_src
) ||
714 !eth_addr_is_zero(match
->wc
.masks
.dl_dst
)) {
715 struct rte_flow_item_eth
*spec
, *mask
;
717 spec
= xzalloc(sizeof *spec
);
718 mask
= xzalloc(sizeof *mask
);
720 memcpy(&spec
->dst
, &match
->flow
.dl_dst
, sizeof spec
->dst
);
721 memcpy(&spec
->src
, &match
->flow
.dl_src
, sizeof spec
->src
);
722 spec
->type
= match
->flow
.dl_type
;
724 memcpy(&mask
->dst
, &match
->wc
.masks
.dl_dst
, sizeof mask
->dst
);
725 memcpy(&mask
->src
, &match
->wc
.masks
.dl_src
, sizeof mask
->src
);
726 mask
->type
= match
->wc
.masks
.dl_type
;
728 memset(&consumed_masks
->dl_dst
, 0, sizeof consumed_masks
->dl_dst
);
729 memset(&consumed_masks
->dl_src
, 0, sizeof consumed_masks
->dl_src
);
730 consumed_masks
->dl_type
= 0;
732 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ETH
, spec
, mask
);
736 if (match
->wc
.masks
.vlans
[0].tci
&& match
->flow
.vlans
[0].tci
) {
737 struct rte_flow_item_vlan
*spec
, *mask
;
739 spec
= xzalloc(sizeof *spec
);
740 mask
= xzalloc(sizeof *mask
);
742 spec
->tci
= match
->flow
.vlans
[0].tci
& ~htons(VLAN_CFI
);
743 mask
->tci
= match
->wc
.masks
.vlans
[0].tci
& ~htons(VLAN_CFI
);
745 /* Match any protocols. */
746 mask
->inner_type
= 0;
748 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_VLAN
, spec
, mask
);
750 /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
751 * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
752 * scope to handle that.
754 memset(&consumed_masks
->vlans
[0], 0, sizeof consumed_masks
->vlans
[0]);
757 if (match
->flow
.dl_type
== htons(ETH_TYPE_IP
)) {
758 struct rte_flow_item_ipv4
*spec
, *mask
;
760 spec
= xzalloc(sizeof *spec
);
761 mask
= xzalloc(sizeof *mask
);
763 spec
->hdr
.type_of_service
= match
->flow
.nw_tos
;
764 spec
->hdr
.time_to_live
= match
->flow
.nw_ttl
;
765 spec
->hdr
.next_proto_id
= match
->flow
.nw_proto
;
766 spec
->hdr
.src_addr
= match
->flow
.nw_src
;
767 spec
->hdr
.dst_addr
= match
->flow
.nw_dst
;
769 mask
->hdr
.type_of_service
= match
->wc
.masks
.nw_tos
;
770 mask
->hdr
.time_to_live
= match
->wc
.masks
.nw_ttl
;
771 mask
->hdr
.next_proto_id
= match
->wc
.masks
.nw_proto
;
772 mask
->hdr
.src_addr
= match
->wc
.masks
.nw_src
;
773 mask
->hdr
.dst_addr
= match
->wc
.masks
.nw_dst
;
775 consumed_masks
->nw_tos
= 0;
776 consumed_masks
->nw_ttl
= 0;
777 consumed_masks
->nw_proto
= 0;
778 consumed_masks
->nw_src
= 0;
779 consumed_masks
->nw_dst
= 0;
781 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_IPV4
, spec
, mask
);
783 /* Save proto for L4 protocol setup. */
784 proto
= spec
->hdr
.next_proto_id
&
785 mask
->hdr
.next_proto_id
;
786 next_proto_mask
= &mask
->hdr
.next_proto_id
;
788 /* If fragmented, then don't HW accelerate - for now. */
789 if (match
->wc
.masks
.nw_frag
& match
->flow
.nw_frag
) {
792 consumed_masks
->nw_frag
= 0;
795 if (match
->flow
.dl_type
== htons(ETH_TYPE_IPV6
)) {
796 struct rte_flow_item_ipv6
*spec
, *mask
;
798 spec
= xzalloc(sizeof *spec
);
799 mask
= xzalloc(sizeof *mask
);
801 spec
->hdr
.proto
= match
->flow
.nw_proto
;
802 spec
->hdr
.hop_limits
= match
->flow
.nw_ttl
;
804 htonl((uint32_t) match
->flow
.nw_tos
<< RTE_IPV6_HDR_TC_SHIFT
);
805 memcpy(spec
->hdr
.src_addr
, &match
->flow
.ipv6_src
,
806 sizeof spec
->hdr
.src_addr
);
807 memcpy(spec
->hdr
.dst_addr
, &match
->flow
.ipv6_dst
,
808 sizeof spec
->hdr
.dst_addr
);
810 mask
->hdr
.proto
= match
->wc
.masks
.nw_proto
;
811 mask
->hdr
.hop_limits
= match
->wc
.masks
.nw_ttl
;
813 htonl((uint32_t) match
->wc
.masks
.nw_tos
<< RTE_IPV6_HDR_TC_SHIFT
);
814 memcpy(mask
->hdr
.src_addr
, &match
->wc
.masks
.ipv6_src
,
815 sizeof mask
->hdr
.src_addr
);
816 memcpy(mask
->hdr
.dst_addr
, &match
->wc
.masks
.ipv6_dst
,
817 sizeof mask
->hdr
.dst_addr
);
819 consumed_masks
->nw_proto
= 0;
820 consumed_masks
->nw_ttl
= 0;
821 consumed_masks
->nw_tos
= 0;
822 memset(&consumed_masks
->ipv6_src
, 0, sizeof consumed_masks
->ipv6_src
);
823 memset(&consumed_masks
->ipv6_dst
, 0, sizeof consumed_masks
->ipv6_dst
);
825 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_IPV6
, spec
, mask
);
827 /* Save proto for L4 protocol setup. */
828 proto
= spec
->hdr
.proto
& mask
->hdr
.proto
;
829 next_proto_mask
= &mask
->hdr
.proto
;
832 if (proto
!= IPPROTO_ICMP
&& proto
!= IPPROTO_UDP
&&
833 proto
!= IPPROTO_SCTP
&& proto
!= IPPROTO_TCP
&&
834 (match
->wc
.masks
.tp_src
||
835 match
->wc
.masks
.tp_dst
||
836 match
->wc
.masks
.tcp_flags
)) {
837 VLOG_DBG("L4 Protocol (%u) not supported", proto
);
841 if (proto
== IPPROTO_TCP
) {
842 struct rte_flow_item_tcp
*spec
, *mask
;
844 spec
= xzalloc(sizeof *spec
);
845 mask
= xzalloc(sizeof *mask
);
847 spec
->hdr
.src_port
= match
->flow
.tp_src
;
848 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
849 spec
->hdr
.data_off
= ntohs(match
->flow
.tcp_flags
) >> 8;
850 spec
->hdr
.tcp_flags
= ntohs(match
->flow
.tcp_flags
) & 0xff;
852 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
853 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
854 mask
->hdr
.data_off
= ntohs(match
->wc
.masks
.tcp_flags
) >> 8;
855 mask
->hdr
.tcp_flags
= ntohs(match
->wc
.masks
.tcp_flags
) & 0xff;
857 consumed_masks
->tp_src
= 0;
858 consumed_masks
->tp_dst
= 0;
859 consumed_masks
->tcp_flags
= 0;
861 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_TCP
, spec
, mask
);
863 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
864 if (next_proto_mask
) {
865 *next_proto_mask
= 0;
867 } else if (proto
== IPPROTO_UDP
) {
868 struct rte_flow_item_udp
*spec
, *mask
;
870 spec
= xzalloc(sizeof *spec
);
871 mask
= xzalloc(sizeof *mask
);
873 spec
->hdr
.src_port
= match
->flow
.tp_src
;
874 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
876 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
877 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
879 consumed_masks
->tp_src
= 0;
880 consumed_masks
->tp_dst
= 0;
882 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_UDP
, spec
, mask
);
884 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
885 if (next_proto_mask
) {
886 *next_proto_mask
= 0;
888 } else if (proto
== IPPROTO_SCTP
) {
889 struct rte_flow_item_sctp
*spec
, *mask
;
891 spec
= xzalloc(sizeof *spec
);
892 mask
= xzalloc(sizeof *mask
);
894 spec
->hdr
.src_port
= match
->flow
.tp_src
;
895 spec
->hdr
.dst_port
= match
->flow
.tp_dst
;
897 mask
->hdr
.src_port
= match
->wc
.masks
.tp_src
;
898 mask
->hdr
.dst_port
= match
->wc
.masks
.tp_dst
;
900 consumed_masks
->tp_src
= 0;
901 consumed_masks
->tp_dst
= 0;
903 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_SCTP
, spec
, mask
);
905 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
906 if (next_proto_mask
) {
907 *next_proto_mask
= 0;
909 } else if (proto
== IPPROTO_ICMP
) {
910 struct rte_flow_item_icmp
*spec
, *mask
;
912 spec
= xzalloc(sizeof *spec
);
913 mask
= xzalloc(sizeof *mask
);
915 spec
->hdr
.icmp_type
= (uint8_t) ntohs(match
->flow
.tp_src
);
916 spec
->hdr
.icmp_code
= (uint8_t) ntohs(match
->flow
.tp_dst
);
918 mask
->hdr
.icmp_type
= (uint8_t) ntohs(match
->wc
.masks
.tp_src
);
919 mask
->hdr
.icmp_code
= (uint8_t) ntohs(match
->wc
.masks
.tp_dst
);
921 consumed_masks
->tp_src
= 0;
922 consumed_masks
->tp_dst
= 0;
924 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_ICMP
, spec
, mask
);
926 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
927 if (next_proto_mask
) {
928 *next_proto_mask
= 0;
932 add_flow_pattern(patterns
, RTE_FLOW_ITEM_TYPE_END
, NULL
, NULL
);
934 if (!is_all_zeros(consumed_masks
, sizeof *consumed_masks
)) {
941 add_flow_mark_rss_actions(struct flow_actions
*actions
,
943 const struct netdev
*netdev
)
945 struct rte_flow_action_mark
*mark
;
946 struct action_rss_data
{
947 struct rte_flow_action_rss conf
;
950 BUILD_ASSERT_DECL(offsetof(struct action_rss_data
, conf
) == 0);
953 mark
= xzalloc(sizeof *mark
);
955 mark
->id
= flow_mark
;
956 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_MARK
, mark
);
958 rss_data
= xmalloc(sizeof *rss_data
+
959 netdev_n_rxq(netdev
) * sizeof rss_data
->queue
[0]);
960 *rss_data
= (struct action_rss_data
) {
961 .conf
= (struct rte_flow_action_rss
) {
962 .func
= RTE_ETH_HASH_FUNCTION_DEFAULT
,
965 .queue_num
= netdev_n_rxq(netdev
),
966 .queue
= rss_data
->queue
,
972 /* Override queue array with default. */
973 for (i
= 0; i
< netdev_n_rxq(netdev
); i
++) {
974 rss_data
->queue
[i
] = i
;
977 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_RSS
, &rss_data
->conf
);
978 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
981 static struct rte_flow
*
982 netdev_offload_dpdk_mark_rss(struct flow_patterns
*patterns
,
983 struct netdev
*netdev
,
986 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
987 const struct rte_flow_attr flow_attr
= {
993 struct rte_flow_error error
;
994 struct rte_flow
*flow
;
996 add_flow_mark_rss_actions(&actions
, flow_mark
, netdev
);
998 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
999 actions
.actions
, &error
);
1001 free_flow_actions(&actions
);
1006 add_count_action(struct flow_actions
*actions
)
1008 struct rte_flow_action_count
*count
= xzalloc(sizeof *count
);
1010 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_COUNT
, count
);
1014 add_port_id_action(struct flow_actions
*actions
,
1015 struct netdev
*outdev
)
1017 struct rte_flow_action_port_id
*port_id
;
1020 outdev_id
= netdev_dpdk_get_port_id(outdev
);
1021 if (outdev_id
< 0) {
1024 port_id
= xzalloc(sizeof *port_id
);
1025 port_id
->id
= outdev_id
;
1026 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_PORT_ID
, port_id
);
1031 add_output_action(struct netdev
*netdev
,
1032 struct flow_actions
*actions
,
1033 const struct nlattr
*nla
)
1035 struct netdev
*outdev
;
1039 port
= nl_attr_get_odp_port(nla
);
1040 outdev
= netdev_ports_get(port
, netdev
->dpif_type
);
1041 if (outdev
== NULL
) {
1042 VLOG_DBG_RL(&rl
, "Cannot find netdev for odp port %"PRIu32
, port
);
1045 if (!netdev_flow_api_equals(netdev
, outdev
) ||
1046 add_port_id_action(actions
, outdev
)) {
1047 VLOG_DBG_RL(&rl
, "%s: Output to port \'%s\' cannot be offloaded.",
1048 netdev_get_name(netdev
), netdev_get_name(outdev
));
1051 netdev_close(outdev
);
1056 add_set_flow_action__(struct flow_actions
*actions
,
1057 const void *value
, void *mask
,
1058 const size_t size
, const int attr
)
1063 /* DPDK does not support partially masked set actions. In such
1064 * case, fail the offload.
1066 if (is_all_zeros(mask
, size
)) {
1069 if (!is_all_ones(mask
, size
)) {
1070 VLOG_DBG_RL(&rl
, "Partial mask is not supported");
1075 spec
= xzalloc(size
);
1076 memcpy(spec
, value
, size
);
1077 add_flow_action(actions
, attr
, spec
);
1079 /* Clear used mask for later checking. */
1081 memset(mask
, 0, size
);
1086 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac
) ==
1087 MEMBER_SIZEOF(struct ovs_key_ethernet
, eth_src
));
1088 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac
) ==
1089 MEMBER_SIZEOF(struct ovs_key_ethernet
, eth_dst
));
1090 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4
) ==
1091 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_src
));
1092 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4
) ==
1093 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_dst
));
1094 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl
) ==
1095 MEMBER_SIZEOF(struct ovs_key_ipv4
, ipv4_ttl
));
1096 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6
) ==
1097 MEMBER_SIZEOF(struct ovs_key_ipv6
, ipv6_src
));
1098 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6
) ==
1099 MEMBER_SIZEOF(struct ovs_key_ipv6
, ipv6_dst
));
1100 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl
) ==
1101 MEMBER_SIZEOF(struct ovs_key_ipv6
, ipv6_hlimit
));
1102 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
1103 MEMBER_SIZEOF(struct ovs_key_tcp
, tcp_src
));
1104 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
1105 MEMBER_SIZEOF(struct ovs_key_tcp
, tcp_dst
));
1106 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
1107 MEMBER_SIZEOF(struct ovs_key_udp
, udp_src
));
1108 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp
) ==
1109 MEMBER_SIZEOF(struct ovs_key_udp
, udp_dst
));
1112 parse_set_actions(struct flow_actions
*actions
,
1113 const struct nlattr
*set_actions
,
1114 const size_t set_actions_len
,
1117 const struct nlattr
*sa
;
1120 #define add_set_flow_action(field, type) \
1121 if (add_set_flow_action__(actions, &key->field, \
1122 mask ? CONST_CAST(void *, &mask->field) : NULL, \
1123 sizeof key->field, type)) { \
1127 NL_ATTR_FOR_EACH_UNSAFE (sa
, sleft
, set_actions
, set_actions_len
) {
1128 if (nl_attr_type(sa
) == OVS_KEY_ATTR_ETHERNET
) {
1129 const struct ovs_key_ethernet
*key
= nl_attr_get(sa
);
1130 const struct ovs_key_ethernet
*mask
= masked
? key
+ 1 : NULL
;
1132 add_set_flow_action(eth_src
, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
);
1133 add_set_flow_action(eth_dst
, RTE_FLOW_ACTION_TYPE_SET_MAC_DST
);
1135 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
1136 VLOG_DBG_RL(&rl
, "Unsupported ETHERNET set action");
1139 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_IPV4
) {
1140 const struct ovs_key_ipv4
*key
= nl_attr_get(sa
);
1141 const struct ovs_key_ipv4
*mask
= masked
? key
+ 1 : NULL
;
1143 add_set_flow_action(ipv4_src
, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
);
1144 add_set_flow_action(ipv4_dst
, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
);
1145 add_set_flow_action(ipv4_ttl
, RTE_FLOW_ACTION_TYPE_SET_TTL
);
1147 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
1148 VLOG_DBG_RL(&rl
, "Unsupported IPv4 set action");
1151 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_IPV6
) {
1152 const struct ovs_key_ipv6
*key
= nl_attr_get(sa
);
1153 const struct ovs_key_ipv6
*mask
= masked
? key
+ 1 : NULL
;
1155 add_set_flow_action(ipv6_src
, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
);
1156 add_set_flow_action(ipv6_dst
, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
);
1157 add_set_flow_action(ipv6_hlimit
, RTE_FLOW_ACTION_TYPE_SET_TTL
);
1159 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
1160 VLOG_DBG_RL(&rl
, "Unsupported IPv6 set action");
1163 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_TCP
) {
1164 const struct ovs_key_tcp
*key
= nl_attr_get(sa
);
1165 const struct ovs_key_tcp
*mask
= masked
? key
+ 1 : NULL
;
1167 add_set_flow_action(tcp_src
, RTE_FLOW_ACTION_TYPE_SET_TP_SRC
);
1168 add_set_flow_action(tcp_dst
, RTE_FLOW_ACTION_TYPE_SET_TP_DST
);
1170 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
1171 VLOG_DBG_RL(&rl
, "Unsupported TCP set action");
1174 } else if (nl_attr_type(sa
) == OVS_KEY_ATTR_UDP
) {
1175 const struct ovs_key_udp
*key
= nl_attr_get(sa
);
1176 const struct ovs_key_udp
*mask
= masked
? key
+ 1 : NULL
;
1178 add_set_flow_action(udp_src
, RTE_FLOW_ACTION_TYPE_SET_TP_SRC
);
1179 add_set_flow_action(udp_dst
, RTE_FLOW_ACTION_TYPE_SET_TP_DST
);
1181 if (mask
&& !is_all_zeros(mask
, sizeof *mask
)) {
1182 VLOG_DBG_RL(&rl
, "Unsupported UDP set action");
1187 "Unsupported set action type %d", nl_attr_type(sa
));
1195 /* Maximum number of items in struct rte_flow_action_vxlan_encap.
1196 * ETH / IPv4(6) / UDP / VXLAN / END
1198 #define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
1201 add_vxlan_encap_action(struct flow_actions
*actions
,
1204 const struct eth_header
*eth
;
1205 const struct udp_header
*udp
;
1207 struct rte_flow_action_vxlan_encap conf
;
1208 struct rte_flow_item items
[ACTION_VXLAN_ENCAP_ITEMS_NUM
];
1210 BUILD_ASSERT_DECL(offsetof(struct vxlan_data
, conf
) == 0);
1216 vxlan_data
= xzalloc(sizeof *vxlan_data
);
1221 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_ETH
;
1222 vxlan_data
->items
[field
].spec
= eth
;
1223 vxlan_data
->items
[field
].mask
= &rte_flow_item_eth_mask
;
1228 if (eth
->eth_type
== htons(ETH_TYPE_IP
)) {
1230 const struct ip_header
*ip
= l3
;
1232 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_IPV4
;
1233 vxlan_data
->items
[field
].spec
= ip
;
1234 vxlan_data
->items
[field
].mask
= &rte_flow_item_ipv4_mask
;
1236 if (ip
->ip_proto
!= IPPROTO_UDP
) {
1240 } else if (eth
->eth_type
== htons(ETH_TYPE_IPV6
)) {
1241 const struct ovs_16aligned_ip6_hdr
*ip6
= l3
;
1243 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_IPV6
;
1244 vxlan_data
->items
[field
].spec
= ip6
;
1245 vxlan_data
->items
[field
].mask
= &rte_flow_item_ipv6_mask
;
1247 if (ip6
->ip6_nxt
!= IPPROTO_UDP
) {
1257 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_UDP
;
1258 vxlan_data
->items
[field
].spec
= udp
;
1259 vxlan_data
->items
[field
].mask
= &rte_flow_item_udp_mask
;
1263 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_VXLAN
;
1264 vxlan_data
->items
[field
].spec
= vxlan
;
1265 vxlan_data
->items
[field
].mask
= &rte_flow_item_vxlan_mask
;
1268 vxlan_data
->items
[field
].type
= RTE_FLOW_ITEM_TYPE_END
;
1270 vxlan_data
->conf
.definition
= vxlan_data
->items
;
1272 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
, vxlan_data
);
1281 parse_vlan_push_action(struct flow_actions
*actions
,
1282 const struct ovs_action_push_vlan
*vlan_push
)
1284 struct rte_flow_action_of_push_vlan
*rte_push_vlan
;
1285 struct rte_flow_action_of_set_vlan_pcp
*rte_vlan_pcp
;
1286 struct rte_flow_action_of_set_vlan_vid
*rte_vlan_vid
;
1288 rte_push_vlan
= xzalloc(sizeof *rte_push_vlan
);
1289 rte_push_vlan
->ethertype
= vlan_push
->vlan_tpid
;
1290 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
, rte_push_vlan
);
1292 rte_vlan_pcp
= xzalloc(sizeof *rte_vlan_pcp
);
1293 rte_vlan_pcp
->vlan_pcp
= vlan_tci_to_pcp(vlan_push
->vlan_tci
);
1294 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
,
1297 rte_vlan_vid
= xzalloc(sizeof *rte_vlan_vid
);
1298 rte_vlan_vid
->vlan_vid
= htons(vlan_tci_to_vid(vlan_push
->vlan_tci
));
1299 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
,
1305 parse_clone_actions(struct netdev
*netdev
,
1306 struct flow_actions
*actions
,
1307 const struct nlattr
*clone_actions
,
1308 const size_t clone_actions_len
)
1310 const struct nlattr
*ca
;
1313 NL_ATTR_FOR_EACH_UNSAFE (ca
, cleft
, clone_actions
, clone_actions_len
) {
1314 int clone_type
= nl_attr_type(ca
);
1316 if (clone_type
== OVS_ACTION_ATTR_TUNNEL_PUSH
) {
1317 const struct ovs_action_push_tnl
*tnl_push
= nl_attr_get(ca
);
1318 struct rte_flow_action_raw_encap
*raw_encap
;
1320 if (tnl_push
->tnl_type
== OVS_VPORT_TYPE_VXLAN
&&
1321 !add_vxlan_encap_action(actions
, tnl_push
->header
)) {
1325 raw_encap
= xzalloc(sizeof *raw_encap
);
1326 raw_encap
->data
= (uint8_t *) tnl_push
->header
;
1327 raw_encap
->preserve
= NULL
;
1328 raw_encap
->size
= tnl_push
->header_len
;
1330 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_RAW_ENCAP
,
1332 } else if (clone_type
== OVS_ACTION_ATTR_OUTPUT
) {
1333 if (add_output_action(netdev
, actions
, ca
)) {
1338 "Unsupported nested action inside clone(), "
1339 "action type: %d", clone_type
);
1347 parse_flow_actions(struct netdev
*netdev
,
1348 struct flow_actions
*actions
,
1349 struct nlattr
*nl_actions
,
1350 size_t nl_actions_len
)
1355 add_count_action(actions
);
1356 NL_ATTR_FOR_EACH_UNSAFE (nla
, left
, nl_actions
, nl_actions_len
) {
1357 if (nl_attr_type(nla
) == OVS_ACTION_ATTR_OUTPUT
) {
1358 if (add_output_action(netdev
, actions
, nla
)) {
1361 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_DROP
) {
1362 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_DROP
, NULL
);
1363 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_SET
||
1364 nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
) {
1365 const struct nlattr
*set_actions
= nl_attr_get(nla
);
1366 const size_t set_actions_len
= nl_attr_get_size(nla
);
1367 bool masked
= nl_attr_type(nla
) == OVS_ACTION_ATTR_SET_MASKED
;
1369 if (parse_set_actions(actions
, set_actions
, set_actions_len
,
1373 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_PUSH_VLAN
) {
1374 const struct ovs_action_push_vlan
*vlan
= nl_attr_get(nla
);
1376 if (parse_vlan_push_action(actions
, vlan
)) {
1379 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_POP_VLAN
) {
1380 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
, NULL
);
1381 } else if (nl_attr_type(nla
) == OVS_ACTION_ATTR_CLONE
&&
1382 left
<= NLA_ALIGN(nla
->nla_len
)) {
1383 const struct nlattr
*clone_actions
= nl_attr_get(nla
);
1384 size_t clone_actions_len
= nl_attr_get_size(nla
);
1386 if (parse_clone_actions(netdev
, actions
, clone_actions
,
1387 clone_actions_len
)) {
1391 VLOG_DBG_RL(&rl
, "Unsupported action type %d", nl_attr_type(nla
));
1396 if (nl_actions_len
== 0) {
1397 VLOG_DBG_RL(&rl
, "No actions provided");
1401 add_flow_action(actions
, RTE_FLOW_ACTION_TYPE_END
, NULL
);
1405 static struct rte_flow
*
1406 netdev_offload_dpdk_actions(struct netdev
*netdev
,
1407 struct flow_patterns
*patterns
,
1408 struct nlattr
*nl_actions
,
1411 const struct rte_flow_attr flow_attr
= { .ingress
= 1, .transfer
= 1 };
1412 struct flow_actions actions
= { .actions
= NULL
, .cnt
= 0 };
1413 struct rte_flow
*flow
= NULL
;
1414 struct rte_flow_error error
;
1417 ret
= parse_flow_actions(netdev
, &actions
, nl_actions
, actions_len
);
1421 flow
= netdev_offload_dpdk_flow_create(netdev
, &flow_attr
, patterns
->items
,
1422 actions
.actions
, &error
);
1424 free_flow_actions(&actions
);
1428 static struct ufid_to_rte_flow_data
*
1429 netdev_offload_dpdk_add_flow(struct netdev
*netdev
,
1430 struct match
*match
,
1431 struct nlattr
*nl_actions
,
1433 const ovs_u128
*ufid
,
1434 struct offload_info
*info
)
1436 struct flow_patterns patterns
= { .items
= NULL
, .cnt
= 0 };
1437 struct ufid_to_rte_flow_data
*flows_data
= NULL
;
1438 bool actions_offloaded
= true;
1439 struct rte_flow
*flow
;
1441 if (parse_flow_match(&patterns
, match
)) {
1442 VLOG_DBG_RL(&rl
, "%s: matches of ufid "UUID_FMT
" are not supported",
1443 netdev_get_name(netdev
), UUID_ARGS((struct uuid
*) ufid
));
1447 flow
= netdev_offload_dpdk_actions(netdev
, &patterns
, nl_actions
,
1450 /* If we failed to offload the rule actions fallback to MARK+RSS
1453 flow
= netdev_offload_dpdk_mark_rss(&patterns
, netdev
,
1455 actions_offloaded
= false;
1461 flows_data
= ufid_to_rte_flow_associate(ufid
, flow
, actions_offloaded
);
1462 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT
,
1463 netdev_get_name(netdev
), flow
, UUID_ARGS((struct uuid
*)ufid
));
1466 free_flow_patterns(&patterns
);
1471 netdev_offload_dpdk_destroy_flow(struct netdev
*netdev
,
1472 const ovs_u128
*ufid
,
1473 struct rte_flow
*rte_flow
)
1475 struct rte_flow_error error
;
1476 int ret
= netdev_dpdk_rte_flow_destroy(netdev
, rte_flow
, &error
);
1479 ufid_to_rte_flow_disassociate(ufid
);
1480 VLOG_DBG_RL(&rl
, "%s: rte_flow 0x%"PRIxPTR
1481 " flow destroy %d ufid " UUID_FMT
,
1482 netdev_get_name(netdev
), (intptr_t) rte_flow
,
1483 netdev_dpdk_get_port_id(netdev
),
1484 UUID_ARGS((struct uuid
*) ufid
));
1486 VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT
,
1487 netdev_get_name(netdev
), netdev_dpdk_get_port_id(netdev
),
1488 UUID_ARGS((struct uuid
*) ufid
));
1495 netdev_offload_dpdk_flow_put(struct netdev
*netdev
, struct match
*match
,
1496 struct nlattr
*actions
, size_t actions_len
,
1497 const ovs_u128
*ufid
, struct offload_info
*info
,
1498 struct dpif_flow_stats
*stats
)
1500 struct ufid_to_rte_flow_data
*rte_flow_data
;
1501 struct dpif_flow_stats old_stats
;
1502 bool modification
= false;
1506 * If an old rte_flow exists, it means it's a flow modification.
1507 * Here destroy the old rte flow first before adding a new one.
1508 * Keep the stats for the newly created rule.
1510 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1511 if (rte_flow_data
&& rte_flow_data
->rte_flow
) {
1512 old_stats
= rte_flow_data
->stats
;
1513 modification
= true;
1514 ret
= netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1515 rte_flow_data
->rte_flow
);
1521 rte_flow_data
= netdev_offload_dpdk_add_flow(netdev
, match
, actions
,
1522 actions_len
, ufid
, info
);
1523 if (!rte_flow_data
) {
1527 rte_flow_data
->stats
= old_stats
;
1530 *stats
= rte_flow_data
->stats
;
1536 netdev_offload_dpdk_flow_del(struct netdev
*netdev
, const ovs_u128
*ufid
,
1537 struct dpif_flow_stats
*stats
)
1539 struct ufid_to_rte_flow_data
*rte_flow_data
;
1541 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1542 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1547 memset(stats
, 0, sizeof *stats
);
1549 return netdev_offload_dpdk_destroy_flow(netdev
, ufid
,
1550 rte_flow_data
->rte_flow
);
1554 netdev_offload_dpdk_init_flow_api(struct netdev
*netdev
)
1556 return netdev_dpdk_flow_api_supported(netdev
) ? 0 : EOPNOTSUPP
;
1560 netdev_offload_dpdk_flow_get(struct netdev
*netdev
,
1561 struct match
*match OVS_UNUSED
,
1562 struct nlattr
**actions OVS_UNUSED
,
1563 const ovs_u128
*ufid
,
1564 struct dpif_flow_stats
*stats
,
1565 struct dpif_flow_attrs
*attrs
,
1566 struct ofpbuf
*buf OVS_UNUSED
)
1568 struct rte_flow_query_count query
= { .reset
= 1 };
1569 struct ufid_to_rte_flow_data
*rte_flow_data
;
1570 struct rte_flow_error error
;
1573 rte_flow_data
= ufid_to_rte_flow_data_find(ufid
);
1574 if (!rte_flow_data
|| !rte_flow_data
->rte_flow
) {
1579 attrs
->offloaded
= true;
1580 if (!rte_flow_data
->actions_offloaded
) {
1581 attrs
->dp_layer
= "ovs";
1582 memset(stats
, 0, sizeof *stats
);
1585 attrs
->dp_layer
= "dpdk";
1586 ret
= netdev_dpdk_rte_flow_query_count(netdev
, rte_flow_data
->rte_flow
,
1589 VLOG_DBG_RL(&rl
, "%s: Failed to query ufid "UUID_FMT
" flow: %p",
1590 netdev_get_name(netdev
), UUID_ARGS((struct uuid
*) ufid
),
1591 rte_flow_data
->rte_flow
);
1594 rte_flow_data
->stats
.n_packets
+= (query
.hits_set
) ? query
.hits
: 0;
1595 rte_flow_data
->stats
.n_bytes
+= (query
.bytes_set
) ? query
.bytes
: 0;
1596 if (query
.hits_set
&& query
.hits
) {
1597 rte_flow_data
->stats
.used
= time_msec();
1599 memcpy(stats
, &rte_flow_data
->stats
, sizeof *stats
);
1601 attrs
->dp_extra_info
= NULL
;
1605 const struct netdev_flow_api netdev_offload_dpdk
= {
1606 .type
= "dpdk_flow_api",
1607 .flow_put
= netdev_offload_dpdk_flow_put
,
1608 .flow_del
= netdev_offload_dpdk_flow_del
,
1609 .init_flow_api
= netdev_offload_dpdk_init_flow_api
,
1610 .flow_get
= netdev_offload_dpdk_flow_get
,