]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-offload-dpdk.c
netdev-offload-dpdk: Fix for broken ethernet matching HWOL for XL710NIC.
[mirror_ovs.git] / lib / netdev-offload-dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 #include <config.h>
18
19 #include <sys/types.h>
20 #include <netinet/ip6.h>
21 #include <rte_flow.h>
22
23 #include "cmap.h"
24 #include "dpif-netdev.h"
25 #include "netdev-offload-provider.h"
26 #include "netdev-provider.h"
27 #include "openvswitch/match.h"
28 #include "openvswitch/vlog.h"
29 #include "packets.h"
30 #include "uuid.h"
31
32 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
33 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
34
35 /* Thread-safety
36 * =============
37 *
38 * Below API is NOT thread safe in following terms:
39 *
40 * - The caller must be sure that none of these functions will be called
41 * simultaneously. Even for different 'netdev's.
42 *
43 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
44 *
45 * - The caller must be sure that 'netdev' configuration will not be changed.
46 * For example, simultaneous call of 'netdev_reconfigure()' for the same
47 * 'netdev' is forbidden.
48 *
49 * For current implementation all above restrictions could be fulfilled by
50 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
51
52 /*
53 * A mapping from ufid to dpdk rte_flow.
54 */
55 static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
56
57 struct ufid_to_rte_flow_data {
58 struct cmap_node node;
59 ovs_u128 ufid;
60 struct rte_flow *rte_flow;
61 bool actions_offloaded;
62 struct dpif_flow_stats stats;
63 };
64
65 /* Find rte_flow with @ufid. */
66 static struct ufid_to_rte_flow_data *
67 ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
68 {
69 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
70 struct ufid_to_rte_flow_data *data;
71
72 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
73 if (ovs_u128_equals(*ufid, data->ufid)) {
74 return data;
75 }
76 }
77
78 return NULL;
79 }
80
81 static inline void
82 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
83 struct rte_flow *rte_flow, bool actions_offloaded)
84 {
85 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
86 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
87 struct ufid_to_rte_flow_data *data_prev;
88
89 /*
90 * We should not simply overwrite an existing rte flow.
91 * We should have deleted it first before re-adding it.
92 * Thus, if following assert triggers, something is wrong:
93 * the rte_flow is not destroyed.
94 */
95 data_prev = ufid_to_rte_flow_data_find(ufid);
96 if (data_prev) {
97 ovs_assert(data_prev->rte_flow == NULL);
98 }
99
100 data->ufid = *ufid;
101 data->rte_flow = rte_flow;
102 data->actions_offloaded = actions_offloaded;
103
104 cmap_insert(&ufid_to_rte_flow,
105 CONST_CAST(struct cmap_node *, &data->node), hash);
106 }
107
108 static inline void
109 ufid_to_rte_flow_disassociate(const ovs_u128 *ufid)
110 {
111 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
112 struct ufid_to_rte_flow_data *data;
113
114 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
115 if (ovs_u128_equals(*ufid, data->ufid)) {
116 cmap_remove(&ufid_to_rte_flow,
117 CONST_CAST(struct cmap_node *, &data->node), hash);
118 ovsrcu_postpone(free, data);
119 return;
120 }
121 }
122
123 VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow",
124 UUID_ARGS((struct uuid *) ufid));
125 }
126
127 /*
128 * To avoid individual xrealloc calls for each new element, a 'curent_max'
129 * is used to keep track of current allocated number of elements. Starts
130 * by 8 and doubles on each xrealloc call.
131 */
132 struct flow_patterns {
133 struct rte_flow_item *items;
134 int cnt;
135 int current_max;
136 };
137
138 struct flow_actions {
139 struct rte_flow_action *actions;
140 int cnt;
141 int current_max;
142 };
143
144 static void
145 dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
146 {
147 ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
148 attr->ingress ? "ingress " : "",
149 attr->egress ? "egress " : "", attr->priority, attr->group,
150 attr->transfer ? "transfer " : "");
151 }
152
153 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
154 * 'testpmd command'-like format. */
155 #define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
156 if (is_all_ones(&mask, sizeof mask)) { \
157 ds_put_format(s, field " is " fmt " ", spec_pri); \
158 } else if (!is_all_zeros(&mask, sizeof mask)) { \
159 ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
160 spec_pri, mask_pri); \
161 }
162
163 static void
164 dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
165 {
166 if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
167 const struct rte_flow_item_eth *eth_spec = item->spec;
168 const struct rte_flow_item_eth *eth_mask = item->mask;
169
170 ds_put_cstr(s, "eth ");
171 if (eth_spec) {
172 if (!eth_mask) {
173 eth_mask = &rte_flow_item_eth_mask;
174 }
175 DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT,
176 ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
177 ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes));
178 DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT,
179 ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
180 ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes));
181 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
182 ntohs(eth_spec->type),
183 ntohs(eth_mask->type));
184 }
185 ds_put_cstr(s, "/ ");
186 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
187 const struct rte_flow_item_vlan *vlan_spec = item->spec;
188 const struct rte_flow_item_vlan *vlan_mask = item->mask;
189
190 ds_put_cstr(s, "vlan ");
191 if (vlan_spec) {
192 if (!vlan_mask) {
193 vlan_mask = &rte_flow_item_vlan_mask;
194 }
195 DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16,
196 ntohs(vlan_spec->inner_type),
197 ntohs(vlan_mask->inner_type));
198 DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16,
199 ntohs(vlan_spec->tci), ntohs(vlan_mask->tci));
200 }
201 ds_put_cstr(s, "/ ");
202 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
203 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
204 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
205
206 ds_put_cstr(s, "ipv4 ");
207 if (ipv4_spec) {
208 if (!ipv4_mask) {
209 ipv4_mask = &rte_flow_item_ipv4_mask;
210 }
211 DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT,
212 IP_ARGS(ipv4_spec->hdr.src_addr),
213 IP_ARGS(ipv4_mask->hdr.src_addr));
214 DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT,
215 IP_ARGS(ipv4_spec->hdr.dst_addr),
216 IP_ARGS(ipv4_mask->hdr.dst_addr));
217 DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto",
218 "0x%"PRIx8, ipv4_spec->hdr.next_proto_id,
219 ipv4_mask->hdr.next_proto_id);
220 DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos",
221 "0x%"PRIx8, ipv4_spec->hdr.type_of_service,
222 ipv4_mask->hdr.type_of_service);
223 DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl",
224 "0x%"PRIx8, ipv4_spec->hdr.time_to_live,
225 ipv4_mask->hdr.time_to_live);
226 }
227 ds_put_cstr(s, "/ ");
228 } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
229 const struct rte_flow_item_udp *udp_spec = item->spec;
230 const struct rte_flow_item_udp *udp_mask = item->mask;
231
232 ds_put_cstr(s, "udp ");
233 if (udp_spec) {
234 if (!udp_mask) {
235 udp_mask = &rte_flow_item_udp_mask;
236 }
237 DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16,
238 ntohs(udp_spec->hdr.src_port),
239 ntohs(udp_mask->hdr.src_port));
240 DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16,
241 ntohs(udp_spec->hdr.dst_port),
242 ntohs(udp_mask->hdr.dst_port));
243 }
244 ds_put_cstr(s, "/ ");
245 } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
246 const struct rte_flow_item_sctp *sctp_spec = item->spec;
247 const struct rte_flow_item_sctp *sctp_mask = item->mask;
248
249 ds_put_cstr(s, "sctp ");
250 if (sctp_spec) {
251 if (!sctp_mask) {
252 sctp_mask = &rte_flow_item_sctp_mask;
253 }
254 DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16,
255 ntohs(sctp_spec->hdr.src_port),
256 ntohs(sctp_mask->hdr.src_port));
257 DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16,
258 ntohs(sctp_spec->hdr.dst_port),
259 ntohs(sctp_mask->hdr.dst_port));
260 }
261 ds_put_cstr(s, "/ ");
262 } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) {
263 const struct rte_flow_item_icmp *icmp_spec = item->spec;
264 const struct rte_flow_item_icmp *icmp_mask = item->mask;
265
266 ds_put_cstr(s, "icmp ");
267 if (icmp_spec) {
268 if (!icmp_mask) {
269 icmp_mask = &rte_flow_item_icmp_mask;
270 }
271 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8,
272 icmp_spec->hdr.icmp_type,
273 icmp_mask->hdr.icmp_type);
274 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8,
275 icmp_spec->hdr.icmp_code,
276 icmp_mask->hdr.icmp_code);
277 }
278 ds_put_cstr(s, "/ ");
279 } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
280 const struct rte_flow_item_tcp *tcp_spec = item->spec;
281 const struct rte_flow_item_tcp *tcp_mask = item->mask;
282
283 ds_put_cstr(s, "tcp ");
284 if (tcp_spec) {
285 if (!tcp_mask) {
286 tcp_mask = &rte_flow_item_tcp_mask;
287 }
288 DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16,
289 ntohs(tcp_spec->hdr.src_port),
290 ntohs(tcp_mask->hdr.src_port));
291 DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16,
292 ntohs(tcp_spec->hdr.dst_port),
293 ntohs(tcp_mask->hdr.dst_port));
294 DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8,
295 tcp_spec->hdr.tcp_flags,
296 tcp_mask->hdr.tcp_flags);
297 }
298 ds_put_cstr(s, "/ ");
299 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
300 const struct rte_flow_item_ipv6 *ipv6_spec = item->spec;
301 const struct rte_flow_item_ipv6 *ipv6_mask = item->mask;
302
303 char addr_str[INET6_ADDRSTRLEN];
304 char mask_str[INET6_ADDRSTRLEN];
305 struct in6_addr addr, mask;
306
307 ds_put_cstr(s, "ipv6 ");
308 if (ipv6_spec) {
309 if (!ipv6_mask) {
310 ipv6_mask = &rte_flow_item_ipv6_mask;
311 }
312 memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr);
313 memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask);
314 ipv6_string_mapped(addr_str, &addr);
315 ipv6_string_mapped(mask_str, &mask);
316 DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str);
317
318 memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr);
319 memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask);
320 ipv6_string_mapped(addr_str, &addr);
321 ipv6_string_mapped(mask_str, &mask);
322 DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str);
323
324 DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8,
325 ipv6_spec->hdr.proto, ipv6_mask->hdr.proto);
326 DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32,
327 ntohl(ipv6_spec->hdr.vtc_flow),
328 ntohl(ipv6_mask->hdr.vtc_flow));
329 DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8,
330 ipv6_spec->hdr.hop_limits,
331 ipv6_mask->hdr.hop_limits);
332 }
333 ds_put_cstr(s, "/ ");
334 } else {
335 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
336 }
337 }
338
339 static void
340 dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
341 {
342 const struct rte_flow_item_eth *eth = NULL;
343 const struct rte_flow_item_ipv4 *ipv4 = NULL;
344 const struct rte_flow_item_ipv6 *ipv6 = NULL;
345 const struct rte_flow_item_udp *udp = NULL;
346 const struct rte_flow_item_vxlan *vxlan = NULL;
347
348 for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
349 if (items->type == RTE_FLOW_ITEM_TYPE_ETH) {
350 eth = items->spec;
351 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) {
352 ipv4 = items->spec;
353 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) {
354 ipv6 = items->spec;
355 } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) {
356 udp = items->spec;
357 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
358 vxlan = items->spec;
359 }
360 }
361
362 ds_put_format(s, "set vxlan ip-version %s ",
363 ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
364 if (vxlan) {
365 ds_put_format(s, "vni %"PRIu32" ",
366 ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
367 }
368 if (udp) {
369 ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
370 ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port));
371 }
372 if (ipv4) {
373 ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ",
374 IP_ARGS(ipv4->hdr.src_addr),
375 IP_ARGS(ipv4->hdr.dst_addr));
376 }
377 if (ipv6) {
378 struct in6_addr addr;
379
380 ds_put_cstr(s, "ip-src ");
381 memcpy(&addr, ipv6->hdr.src_addr, sizeof addr);
382 ipv6_format_mapped(&addr, s);
383 ds_put_cstr(s, " ip-dst ");
384 memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr);
385 ipv6_format_mapped(&addr, s);
386 ds_put_cstr(s, " ");
387 }
388 if (eth) {
389 ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT,
390 ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes),
391 ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes));
392 }
393 }
394
395 static void
396 dump_flow_action(struct ds *s, struct ds *s_extra,
397 const struct rte_flow_action *actions)
398 {
399 if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
400 const struct rte_flow_action_mark *mark = actions->conf;
401
402 ds_put_cstr(s, "mark ");
403 if (mark) {
404 ds_put_format(s, "id %d ", mark->id);
405 }
406 ds_put_cstr(s, "/ ");
407 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
408 ds_put_cstr(s, "rss / ");
409 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
410 ds_put_cstr(s, "count / ");
411 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
412 const struct rte_flow_action_port_id *port_id = actions->conf;
413
414 ds_put_cstr(s, "port_id ");
415 if (port_id) {
416 ds_put_format(s, "original %d id %d ",
417 port_id->original, port_id->id);
418 }
419 ds_put_cstr(s, "/ ");
420 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
421 ds_put_cstr(s, "drop / ");
422 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ||
423 actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) {
424 const struct rte_flow_action_set_mac *set_mac = actions->conf;
425
426 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST
427 ? "dst" : "src";
428
429 ds_put_format(s, "set_mac_%s ", dirstr);
430 if (set_mac) {
431 ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ",
432 ETH_ADDR_BYTES_ARGS(set_mac->mac_addr));
433 }
434 ds_put_cstr(s, "/ ");
435 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ||
436 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) {
437 const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf;
438 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
439 ? "dst" : "src";
440
441 ds_put_format(s, "set_ipv4_%s ", dirstr);
442 if (set_ipv4) {
443 ds_put_format(s, "ipv4_addr "IP_FMT" ",
444 IP_ARGS(set_ipv4->ipv4_addr));
445 }
446 ds_put_cstr(s, "/ ");
447 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
448 const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
449
450 ds_put_cstr(s, "set_ttl ");
451 if (set_ttl) {
452 ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value);
453 }
454 ds_put_cstr(s, "/ ");
455 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ||
456 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) {
457 const struct rte_flow_action_set_tp *set_tp = actions->conf;
458 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST
459 ? "dst" : "src";
460
461 ds_put_format(s, "set_tp_%s ", dirstr);
462 if (set_tp) {
463 ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port));
464 }
465 ds_put_cstr(s, "/ ");
466 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) {
467 const struct rte_flow_action_of_push_vlan *of_push_vlan =
468 actions->conf;
469
470 ds_put_cstr(s, "of_push_vlan ");
471 if (of_push_vlan) {
472 ds_put_format(s, "ethertype 0x%"PRIx16" ",
473 ntohs(of_push_vlan->ethertype));
474 }
475 ds_put_cstr(s, "/ ");
476 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
477 const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp =
478 actions->conf;
479
480 ds_put_cstr(s, "of_set_vlan_pcp ");
481 if (of_set_vlan_pcp) {
482 ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp);
483 }
484 ds_put_cstr(s, "/ ");
485 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
486 const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid =
487 actions->conf;
488
489 ds_put_cstr(s, "of_set_vlan_vid ");
490 if (of_set_vlan_vid) {
491 ds_put_format(s, "vlan_vid %"PRIu16" ",
492 ntohs(of_set_vlan_vid->vlan_vid));
493 }
494 ds_put_cstr(s, "/ ");
495 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) {
496 ds_put_cstr(s, "of_pop_vlan / ");
497 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ||
498 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) {
499 const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf;
500
501 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
502 ? "dst" : "src";
503
504 ds_put_format(s, "set_ipv6_%s ", dirstr);
505 if (set_ipv6) {
506 ds_put_cstr(s, "ipv6_addr ");
507 ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s);
508 ds_put_cstr(s, " ");
509 }
510 ds_put_cstr(s, "/ ");
511 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
512 const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
513
514 ds_put_cstr(s, "raw_encap index 0 / ");
515 if (raw_encap) {
516 ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw "
517 "pattern is ", raw_encap->size);
518 for (int i = 0; i < raw_encap->size; i++) {
519 ds_put_format(s_extra, "%02x", raw_encap->data[i]);
520 }
521 ds_put_cstr(s_extra, " / end_set;");
522 }
523 } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
524 const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
525 const struct rte_flow_item *items = vxlan_encap->definition;
526
527 ds_put_cstr(s, "vxlan_encap / ");
528 dump_vxlan_encap(s_extra, items);
529 ds_put_cstr(s_extra, ";");
530 } else {
531 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
532 }
533 }
534
535 static struct ds *
536 dump_flow(struct ds *s, struct ds *s_extra,
537 const struct rte_flow_attr *attr,
538 const struct rte_flow_item *items,
539 const struct rte_flow_action *actions)
540 {
541 if (attr) {
542 dump_flow_attr(s, attr);
543 }
544 ds_put_cstr(s, "pattern ");
545 while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
546 dump_flow_pattern(s, items++);
547 }
548 ds_put_cstr(s, "end actions ");
549 while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
550 dump_flow_action(s, s_extra, actions++);
551 }
552 ds_put_cstr(s, "end");
553 return s;
554 }
555
556 static struct rte_flow *
557 netdev_offload_dpdk_flow_create(struct netdev *netdev,
558 const struct rte_flow_attr *attr,
559 const struct rte_flow_item *items,
560 const struct rte_flow_action *actions,
561 struct rte_flow_error *error)
562 {
563 struct ds s_extra = DS_EMPTY_INITIALIZER;
564 struct ds s = DS_EMPTY_INITIALIZER;
565 struct rte_flow *flow;
566 char *extra_str;
567
568 flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
569 if (flow) {
570 if (!VLOG_DROP_DBG(&rl)) {
571 dump_flow(&s, &s_extra, attr, items, actions);
572 extra_str = ds_cstr(&s_extra);
573 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
574 netdev_get_name(netdev), (intptr_t) flow, extra_str,
575 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
576 }
577 } else {
578 enum vlog_level level = VLL_WARN;
579
580 if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) {
581 level = VLL_DBG;
582 }
583 VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
584 netdev_get_name(netdev), error->type, error->message);
585 if (!vlog_should_drop(&this_module, level, &rl)) {
586 dump_flow(&s, &s_extra, attr, items, actions);
587 extra_str = ds_cstr(&s_extra);
588 VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
589 netdev_get_name(netdev), extra_str,
590 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
591 }
592 }
593 ds_destroy(&s);
594 ds_destroy(&s_extra);
595 return flow;
596 }
597
598 static void
599 add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
600 const void *spec, const void *mask)
601 {
602 int cnt = patterns->cnt;
603
604 if (cnt == 0) {
605 patterns->current_max = 8;
606 patterns->items = xcalloc(patterns->current_max,
607 sizeof *patterns->items);
608 } else if (cnt == patterns->current_max) {
609 patterns->current_max *= 2;
610 patterns->items = xrealloc(patterns->items, patterns->current_max *
611 sizeof *patterns->items);
612 }
613
614 patterns->items[cnt].type = type;
615 patterns->items[cnt].spec = spec;
616 patterns->items[cnt].mask = mask;
617 patterns->items[cnt].last = NULL;
618 patterns->cnt++;
619 }
620
621 static void
622 add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
623 const void *conf)
624 {
625 int cnt = actions->cnt;
626
627 if (cnt == 0) {
628 actions->current_max = 8;
629 actions->actions = xcalloc(actions->current_max,
630 sizeof *actions->actions);
631 } else if (cnt == actions->current_max) {
632 actions->current_max *= 2;
633 actions->actions = xrealloc(actions->actions, actions->current_max *
634 sizeof *actions->actions);
635 }
636
637 actions->actions[cnt].type = type;
638 actions->actions[cnt].conf = conf;
639 actions->cnt++;
640 }
641
642 static void
643 free_flow_patterns(struct flow_patterns *patterns)
644 {
645 int i;
646
647 for (i = 0; i < patterns->cnt; i++) {
648 if (patterns->items[i].spec) {
649 free(CONST_CAST(void *, patterns->items[i].spec));
650 }
651 if (patterns->items[i].mask) {
652 free(CONST_CAST(void *, patterns->items[i].mask));
653 }
654 }
655 free(patterns->items);
656 patterns->items = NULL;
657 patterns->cnt = 0;
658 }
659
660 static void
661 free_flow_actions(struct flow_actions *actions)
662 {
663 int i;
664
665 for (i = 0; i < actions->cnt; i++) {
666 if (actions->actions[i].conf) {
667 free(CONST_CAST(void *, actions->actions[i].conf));
668 }
669 }
670 free(actions->actions);
671 actions->actions = NULL;
672 actions->cnt = 0;
673 }
674
675 static int
676 parse_flow_match(struct flow_patterns *patterns,
677 struct match *match)
678 {
679 uint8_t *next_proto_mask = NULL;
680 struct flow *consumed_masks;
681 uint8_t proto = 0;
682
683 consumed_masks = &match->wc.masks;
684
685 memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port);
686 /* recirc id must be zero. */
687 if (match->wc.masks.recirc_id & match->flow.recirc_id) {
688 return -1;
689 }
690 consumed_masks->recirc_id = 0;
691 consumed_masks->packet_type = 0;
692
693 /* Eth */
694 if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow)
695 && eth_addr_is_zero(match->wc.masks.dl_dst)
696 && eth_addr_is_zero(match->wc.masks.dl_src)) {
697 /*
698 * This is a temporary work around to fix ethernet pattern for partial
699 * hardware offload for X710 devices. This fix will be reverted once
700 * the issue is fixed within the i40e PMD driver.
701 */
702 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
703
704 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
705 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
706 consumed_masks->dl_type = 0;
707 } else if (match->wc.masks.dl_type ||
708 !eth_addr_is_zero(match->wc.masks.dl_src) ||
709 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
710 struct rte_flow_item_eth *spec, *mask;
711
712 spec = xzalloc(sizeof *spec);
713 mask = xzalloc(sizeof *mask);
714
715 memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst);
716 memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src);
717 spec->type = match->flow.dl_type;
718
719 memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst);
720 memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src);
721 mask->type = match->wc.masks.dl_type;
722
723 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
724 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
725 consumed_masks->dl_type = 0;
726
727 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
728 }
729
730 /* VLAN */
731 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
732 struct rte_flow_item_vlan *spec, *mask;
733
734 spec = xzalloc(sizeof *spec);
735 mask = xzalloc(sizeof *mask);
736
737 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
738 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
739
740 /* Match any protocols. */
741 mask->inner_type = 0;
742
743 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
744 }
745 /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
746 * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
747 * scope to handle that.
748 */
749 memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]);
750
751 /* IP v4 */
752 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
753 struct rte_flow_item_ipv4 *spec, *mask;
754
755 spec = xzalloc(sizeof *spec);
756 mask = xzalloc(sizeof *mask);
757
758 spec->hdr.type_of_service = match->flow.nw_tos;
759 spec->hdr.time_to_live = match->flow.nw_ttl;
760 spec->hdr.next_proto_id = match->flow.nw_proto;
761 spec->hdr.src_addr = match->flow.nw_src;
762 spec->hdr.dst_addr = match->flow.nw_dst;
763
764 mask->hdr.type_of_service = match->wc.masks.nw_tos;
765 mask->hdr.time_to_live = match->wc.masks.nw_ttl;
766 mask->hdr.next_proto_id = match->wc.masks.nw_proto;
767 mask->hdr.src_addr = match->wc.masks.nw_src;
768 mask->hdr.dst_addr = match->wc.masks.nw_dst;
769
770 consumed_masks->nw_tos = 0;
771 consumed_masks->nw_ttl = 0;
772 consumed_masks->nw_proto = 0;
773 consumed_masks->nw_src = 0;
774 consumed_masks->nw_dst = 0;
775
776 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask);
777
778 /* Save proto for L4 protocol setup. */
779 proto = spec->hdr.next_proto_id &
780 mask->hdr.next_proto_id;
781 next_proto_mask = &mask->hdr.next_proto_id;
782 }
783 /* If fragmented, then don't HW accelerate - for now. */
784 if (match->wc.masks.nw_frag & match->flow.nw_frag) {
785 return -1;
786 }
787 consumed_masks->nw_frag = 0;
788
789 /* IP v6 */
790 if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {
791 struct rte_flow_item_ipv6 *spec, *mask;
792
793 spec = xzalloc(sizeof *spec);
794 mask = xzalloc(sizeof *mask);
795
796 spec->hdr.proto = match->flow.nw_proto;
797 spec->hdr.hop_limits = match->flow.nw_ttl;
798 spec->hdr.vtc_flow =
799 htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
800 memcpy(spec->hdr.src_addr, &match->flow.ipv6_src,
801 sizeof spec->hdr.src_addr);
802 memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst,
803 sizeof spec->hdr.dst_addr);
804
805 mask->hdr.proto = match->wc.masks.nw_proto;
806 mask->hdr.hop_limits = match->wc.masks.nw_ttl;
807 mask->hdr.vtc_flow =
808 htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
809 memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src,
810 sizeof mask->hdr.src_addr);
811 memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst,
812 sizeof mask->hdr.dst_addr);
813
814 consumed_masks->nw_proto = 0;
815 consumed_masks->nw_ttl = 0;
816 consumed_masks->nw_tos = 0;
817 memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src);
818 memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst);
819
820 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask);
821
822 /* Save proto for L4 protocol setup. */
823 proto = spec->hdr.proto & mask->hdr.proto;
824 next_proto_mask = &mask->hdr.proto;
825 }
826
827 if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
828 proto != IPPROTO_SCTP && proto != IPPROTO_TCP &&
829 (match->wc.masks.tp_src ||
830 match->wc.masks.tp_dst ||
831 match->wc.masks.tcp_flags)) {
832 VLOG_DBG("L4 Protocol (%u) not supported", proto);
833 return -1;
834 }
835
836 if (proto == IPPROTO_TCP) {
837 struct rte_flow_item_tcp *spec, *mask;
838
839 spec = xzalloc(sizeof *spec);
840 mask = xzalloc(sizeof *mask);
841
842 spec->hdr.src_port = match->flow.tp_src;
843 spec->hdr.dst_port = match->flow.tp_dst;
844 spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8;
845 spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
846
847 mask->hdr.src_port = match->wc.masks.tp_src;
848 mask->hdr.dst_port = match->wc.masks.tp_dst;
849 mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
850 mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
851
852 consumed_masks->tp_src = 0;
853 consumed_masks->tp_dst = 0;
854 consumed_masks->tcp_flags = 0;
855
856 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
857
858 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
859 if (next_proto_mask) {
860 *next_proto_mask = 0;
861 }
862 } else if (proto == IPPROTO_UDP) {
863 struct rte_flow_item_udp *spec, *mask;
864
865 spec = xzalloc(sizeof *spec);
866 mask = xzalloc(sizeof *mask);
867
868 spec->hdr.src_port = match->flow.tp_src;
869 spec->hdr.dst_port = match->flow.tp_dst;
870
871 mask->hdr.src_port = match->wc.masks.tp_src;
872 mask->hdr.dst_port = match->wc.masks.tp_dst;
873
874 consumed_masks->tp_src = 0;
875 consumed_masks->tp_dst = 0;
876
877 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
878
879 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
880 if (next_proto_mask) {
881 *next_proto_mask = 0;
882 }
883 } else if (proto == IPPROTO_SCTP) {
884 struct rte_flow_item_sctp *spec, *mask;
885
886 spec = xzalloc(sizeof *spec);
887 mask = xzalloc(sizeof *mask);
888
889 spec->hdr.src_port = match->flow.tp_src;
890 spec->hdr.dst_port = match->flow.tp_dst;
891
892 mask->hdr.src_port = match->wc.masks.tp_src;
893 mask->hdr.dst_port = match->wc.masks.tp_dst;
894
895 consumed_masks->tp_src = 0;
896 consumed_masks->tp_dst = 0;
897
898 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
899
900 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
901 if (next_proto_mask) {
902 *next_proto_mask = 0;
903 }
904 } else if (proto == IPPROTO_ICMP) {
905 struct rte_flow_item_icmp *spec, *mask;
906
907 spec = xzalloc(sizeof *spec);
908 mask = xzalloc(sizeof *mask);
909
910 spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src);
911 spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst);
912
913 mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src);
914 mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst);
915
916 consumed_masks->tp_src = 0;
917 consumed_masks->tp_dst = 0;
918
919 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
920
921 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
922 if (next_proto_mask) {
923 *next_proto_mask = 0;
924 }
925 }
926
927 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
928
929 if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) {
930 return -1;
931 }
932 return 0;
933 }
934
935 static void
936 add_flow_mark_rss_actions(struct flow_actions *actions,
937 uint32_t flow_mark,
938 const struct netdev *netdev)
939 {
940 struct rte_flow_action_mark *mark;
941 struct action_rss_data {
942 struct rte_flow_action_rss conf;
943 uint16_t queue[0];
944 } *rss_data;
945 BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0);
946 int i;
947
948 mark = xzalloc(sizeof *mark);
949
950 mark->id = flow_mark;
951 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark);
952
953 rss_data = xmalloc(sizeof *rss_data +
954 netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
955 *rss_data = (struct action_rss_data) {
956 .conf = (struct rte_flow_action_rss) {
957 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
958 .level = 0,
959 .types = 0,
960 .queue_num = netdev_n_rxq(netdev),
961 .queue = rss_data->queue,
962 .key_len = 0,
963 .key = NULL
964 },
965 };
966
967 /* Override queue array with default. */
968 for (i = 0; i < netdev_n_rxq(netdev); i++) {
969 rss_data->queue[i] = i;
970 }
971
972 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
973 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
974 }
975
976 static struct rte_flow *
977 netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
978 struct netdev *netdev,
979 uint32_t flow_mark)
980 {
981 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
982 const struct rte_flow_attr flow_attr = {
983 .group = 0,
984 .priority = 0,
985 .ingress = 1,
986 .egress = 0
987 };
988 struct rte_flow_error error;
989 struct rte_flow *flow;
990
991 add_flow_mark_rss_actions(&actions, flow_mark, netdev);
992
993 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
994 actions.actions, &error);
995
996 free_flow_actions(&actions);
997 return flow;
998 }
999
1000 static void
1001 add_count_action(struct flow_actions *actions)
1002 {
1003 struct rte_flow_action_count *count = xzalloc(sizeof *count);
1004
1005 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
1006 }
1007
1008 static int
1009 add_port_id_action(struct flow_actions *actions,
1010 struct netdev *outdev)
1011 {
1012 struct rte_flow_action_port_id *port_id;
1013 int outdev_id;
1014
1015 outdev_id = netdev_dpdk_get_port_id(outdev);
1016 if (outdev_id < 0) {
1017 return -1;
1018 }
1019 port_id = xzalloc(sizeof *port_id);
1020 port_id->id = outdev_id;
1021 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
1022 return 0;
1023 }
1024
1025 static int
1026 add_output_action(struct netdev *netdev,
1027 struct flow_actions *actions,
1028 const struct nlattr *nla)
1029 {
1030 struct netdev *outdev;
1031 odp_port_t port;
1032 int ret = 0;
1033
1034 port = nl_attr_get_odp_port(nla);
1035 outdev = netdev_ports_get(port, netdev->dpif_type);
1036 if (outdev == NULL) {
1037 VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port);
1038 return -1;
1039 }
1040 if (!netdev_flow_api_equals(netdev, outdev) ||
1041 add_port_id_action(actions, outdev)) {
1042 VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.",
1043 netdev_get_name(netdev), netdev_get_name(outdev));
1044 ret = -1;
1045 }
1046 netdev_close(outdev);
1047 return ret;
1048 }
1049
1050 static int
1051 add_set_flow_action__(struct flow_actions *actions,
1052 const void *value, void *mask,
1053 const size_t size, const int attr)
1054 {
1055 void *spec;
1056
1057 if (mask) {
1058 /* DPDK does not support partially masked set actions. In such
1059 * case, fail the offload.
1060 */
1061 if (is_all_zeros(mask, size)) {
1062 return 0;
1063 }
1064 if (!is_all_ones(mask, size)) {
1065 VLOG_DBG_RL(&rl, "Partial mask is not supported");
1066 return -1;
1067 }
1068 }
1069
1070 spec = xzalloc(size);
1071 memcpy(spec, value, size);
1072 add_flow_action(actions, attr, spec);
1073
1074 /* Clear used mask for later checking. */
1075 if (mask) {
1076 memset(mask, 0, size);
1077 }
1078 return 0;
1079 }
1080
1081 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1082 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src));
1083 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1084 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst));
1085 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1086 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src));
1087 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1088 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
1089 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1090 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
1091 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1092 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src));
1093 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1094 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst));
1095 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1096 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit));
1097 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1098 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src));
1099 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1100 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst));
1101 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1102 MEMBER_SIZEOF(struct ovs_key_udp, udp_src));
1103 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1104 MEMBER_SIZEOF(struct ovs_key_udp, udp_dst));
1105
1106 static int
1107 parse_set_actions(struct flow_actions *actions,
1108 const struct nlattr *set_actions,
1109 const size_t set_actions_len,
1110 bool masked)
1111 {
1112 const struct nlattr *sa;
1113 unsigned int sleft;
1114
1115 #define add_set_flow_action(field, type) \
1116 if (add_set_flow_action__(actions, &key->field, \
1117 mask ? CONST_CAST(void *, &mask->field) : NULL, \
1118 sizeof key->field, type)) { \
1119 return -1; \
1120 }
1121
1122 NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) {
1123 if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) {
1124 const struct ovs_key_ethernet *key = nl_attr_get(sa);
1125 const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL;
1126
1127 add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC);
1128 add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST);
1129
1130 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1131 VLOG_DBG_RL(&rl, "Unsupported ETHERNET set action");
1132 return -1;
1133 }
1134 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) {
1135 const struct ovs_key_ipv4 *key = nl_attr_get(sa);
1136 const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL;
1137
1138 add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC);
1139 add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST);
1140 add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL);
1141
1142 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1143 VLOG_DBG_RL(&rl, "Unsupported IPv4 set action");
1144 return -1;
1145 }
1146 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) {
1147 const struct ovs_key_ipv6 *key = nl_attr_get(sa);
1148 const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL;
1149
1150 add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC);
1151 add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST);
1152 add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL);
1153
1154 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1155 VLOG_DBG_RL(&rl, "Unsupported IPv6 set action");
1156 return -1;
1157 }
1158 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) {
1159 const struct ovs_key_tcp *key = nl_attr_get(sa);
1160 const struct ovs_key_tcp *mask = masked ? key + 1 : NULL;
1161
1162 add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1163 add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1164
1165 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1166 VLOG_DBG_RL(&rl, "Unsupported TCP set action");
1167 return -1;
1168 }
1169 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) {
1170 const struct ovs_key_udp *key = nl_attr_get(sa);
1171 const struct ovs_key_udp *mask = masked ? key + 1 : NULL;
1172
1173 add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1174 add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1175
1176 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1177 VLOG_DBG_RL(&rl, "Unsupported UDP set action");
1178 return -1;
1179 }
1180 } else {
1181 VLOG_DBG_RL(&rl,
1182 "Unsupported set action type %d", nl_attr_type(sa));
1183 return -1;
1184 }
1185 }
1186
1187 return 0;
1188 }
1189
1190 /* Maximum number of items in struct rte_flow_action_vxlan_encap.
1191 * ETH / IPv4(6) / UDP / VXLAN / END
1192 */
1193 #define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
1194
1195 static int
1196 add_vxlan_encap_action(struct flow_actions *actions,
1197 const void *header)
1198 {
1199 const struct eth_header *eth;
1200 const struct udp_header *udp;
1201 struct vxlan_data {
1202 struct rte_flow_action_vxlan_encap conf;
1203 struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM];
1204 } *vxlan_data;
1205 BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
1206 const void *vxlan;
1207 const void *l3;
1208 const void *l4;
1209 int field;
1210
1211 vxlan_data = xzalloc(sizeof *vxlan_data);
1212 field = 0;
1213
1214 eth = header;
1215 /* Ethernet */
1216 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
1217 vxlan_data->items[field].spec = eth;
1218 vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
1219 field++;
1220
1221 l3 = eth + 1;
1222 /* IP */
1223 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1224 /* IPv4 */
1225 const struct ip_header *ip = l3;
1226
1227 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
1228 vxlan_data->items[field].spec = ip;
1229 vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
1230
1231 if (ip->ip_proto != IPPROTO_UDP) {
1232 goto err;
1233 }
1234 l4 = (ip + 1);
1235 } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
1236 const struct ovs_16aligned_ip6_hdr *ip6 = l3;
1237
1238 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
1239 vxlan_data->items[field].spec = ip6;
1240 vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
1241
1242 if (ip6->ip6_nxt != IPPROTO_UDP) {
1243 goto err;
1244 }
1245 l4 = (ip6 + 1);
1246 } else {
1247 goto err;
1248 }
1249 field++;
1250
1251 udp = l4;
1252 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
1253 vxlan_data->items[field].spec = udp;
1254 vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
1255 field++;
1256
1257 vxlan = (udp + 1);
1258 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
1259 vxlan_data->items[field].spec = vxlan;
1260 vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
1261 field++;
1262
1263 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
1264
1265 vxlan_data->conf.definition = vxlan_data->items;
1266
1267 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
1268
1269 return 0;
1270 err:
1271 free(vxlan_data);
1272 return -1;
1273 }
1274
1275 static int
1276 parse_vlan_push_action(struct flow_actions *actions,
1277 const struct ovs_action_push_vlan *vlan_push)
1278 {
1279 struct rte_flow_action_of_push_vlan *rte_push_vlan;
1280 struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp;
1281 struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid;
1282
1283 rte_push_vlan = xzalloc(sizeof *rte_push_vlan);
1284 rte_push_vlan->ethertype = vlan_push->vlan_tpid;
1285 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan);
1286
1287 rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp);
1288 rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci);
1289 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
1290 rte_vlan_pcp);
1291
1292 rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid);
1293 rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci));
1294 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
1295 rte_vlan_vid);
1296 return 0;
1297 }
1298
1299 static int
1300 parse_clone_actions(struct netdev *netdev,
1301 struct flow_actions *actions,
1302 const struct nlattr *clone_actions,
1303 const size_t clone_actions_len)
1304 {
1305 const struct nlattr *ca;
1306 unsigned int cleft;
1307
1308 NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
1309 int clone_type = nl_attr_type(ca);
1310
1311 if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
1312 const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
1313 struct rte_flow_action_raw_encap *raw_encap;
1314
1315 if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
1316 !add_vxlan_encap_action(actions, tnl_push->header)) {
1317 continue;
1318 }
1319
1320 raw_encap = xzalloc(sizeof *raw_encap);
1321 raw_encap->data = (uint8_t *) tnl_push->header;
1322 raw_encap->preserve = NULL;
1323 raw_encap->size = tnl_push->header_len;
1324
1325 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
1326 raw_encap);
1327 } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
1328 if (add_output_action(netdev, actions, ca)) {
1329 return -1;
1330 }
1331 } else {
1332 VLOG_DBG_RL(&rl,
1333 "Unsupported nested action inside clone(), "
1334 "action type: %d", clone_type);
1335 return -1;
1336 }
1337 }
1338 return 0;
1339 }
1340
1341 static int
1342 parse_flow_actions(struct netdev *netdev,
1343 struct flow_actions *actions,
1344 struct nlattr *nl_actions,
1345 size_t nl_actions_len)
1346 {
1347 struct nlattr *nla;
1348 size_t left;
1349
1350 add_count_action(actions);
1351 NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
1352 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1353 if (add_output_action(netdev, actions, nla)) {
1354 return -1;
1355 }
1356 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
1357 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL);
1358 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET ||
1359 nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
1360 const struct nlattr *set_actions = nl_attr_get(nla);
1361 const size_t set_actions_len = nl_attr_get_size(nla);
1362 bool masked = nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED;
1363
1364 if (parse_set_actions(actions, set_actions, set_actions_len,
1365 masked)) {
1366 return -1;
1367 }
1368 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) {
1369 const struct ovs_action_push_vlan *vlan = nl_attr_get(nla);
1370
1371 if (parse_vlan_push_action(actions, vlan)) {
1372 return -1;
1373 }
1374 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
1375 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL);
1376 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE &&
1377 left <= NLA_ALIGN(nla->nla_len)) {
1378 const struct nlattr *clone_actions = nl_attr_get(nla);
1379 size_t clone_actions_len = nl_attr_get_size(nla);
1380
1381 if (parse_clone_actions(netdev, actions, clone_actions,
1382 clone_actions_len)) {
1383 return -1;
1384 }
1385 } else {
1386 VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
1387 return -1;
1388 }
1389 }
1390
1391 if (nl_actions_len == 0) {
1392 VLOG_DBG_RL(&rl, "No actions provided");
1393 return -1;
1394 }
1395
1396 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
1397 return 0;
1398 }
1399
1400 static struct rte_flow *
1401 netdev_offload_dpdk_actions(struct netdev *netdev,
1402 struct flow_patterns *patterns,
1403 struct nlattr *nl_actions,
1404 size_t actions_len)
1405 {
1406 const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
1407 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
1408 struct rte_flow *flow = NULL;
1409 struct rte_flow_error error;
1410 int ret;
1411
1412 ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
1413 if (ret) {
1414 goto out;
1415 }
1416 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
1417 actions.actions, &error);
1418 out:
1419 free_flow_actions(&actions);
1420 return flow;
1421 }
1422
1423 static int
1424 netdev_offload_dpdk_add_flow(struct netdev *netdev,
1425 struct match *match,
1426 struct nlattr *nl_actions,
1427 size_t actions_len,
1428 const ovs_u128 *ufid,
1429 struct offload_info *info)
1430 {
1431 struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
1432 bool actions_offloaded = true;
1433 struct rte_flow *flow;
1434 int ret = 0;
1435
1436 ret = parse_flow_match(&patterns, match);
1437 if (ret) {
1438 VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported",
1439 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid));
1440 goto out;
1441 }
1442
1443 flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
1444 actions_len);
1445 if (!flow) {
1446 /* If we failed to offload the rule actions fallback to MARK+RSS
1447 * actions.
1448 */
1449 flow = netdev_offload_dpdk_mark_rss(&patterns, netdev,
1450 info->flow_mark);
1451 actions_offloaded = false;
1452 }
1453
1454 if (!flow) {
1455 ret = -1;
1456 goto out;
1457 }
1458 ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
1459 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT,
1460 netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
1461
1462 out:
1463 free_flow_patterns(&patterns);
1464 return ret;
1465 }
1466
1467 static int
1468 netdev_offload_dpdk_destroy_flow(struct netdev *netdev,
1469 const ovs_u128 *ufid,
1470 struct rte_flow *rte_flow)
1471 {
1472 struct rte_flow_error error;
1473 int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
1474
1475 if (ret == 0) {
1476 ufid_to_rte_flow_disassociate(ufid);
1477 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR
1478 " flow destroy %d ufid " UUID_FMT,
1479 netdev_get_name(netdev), (intptr_t) rte_flow,
1480 netdev_dpdk_get_port_id(netdev),
1481 UUID_ARGS((struct uuid *) ufid));
1482 } else {
1483 VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT,
1484 netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev),
1485 UUID_ARGS((struct uuid *) ufid));
1486 }
1487
1488 return ret;
1489 }
1490
1491 static int
1492 netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
1493 struct nlattr *actions, size_t actions_len,
1494 const ovs_u128 *ufid, struct offload_info *info,
1495 struct dpif_flow_stats *stats)
1496 {
1497 struct ufid_to_rte_flow_data *rte_flow_data;
1498 int ret;
1499
1500 /*
1501 * If an old rte_flow exists, it means it's a flow modification.
1502 * Here destroy the old rte flow first before adding a new one.
1503 */
1504 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1505 if (rte_flow_data && rte_flow_data->rte_flow) {
1506 ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
1507 rte_flow_data->rte_flow);
1508 if (ret < 0) {
1509 return ret;
1510 }
1511 }
1512
1513 if (stats) {
1514 memset(stats, 0, sizeof *stats);
1515 }
1516 return netdev_offload_dpdk_add_flow(netdev, match, actions,
1517 actions_len, ufid, info);
1518 }
1519
1520 static int
1521 netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
1522 struct dpif_flow_stats *stats)
1523 {
1524 struct ufid_to_rte_flow_data *rte_flow_data;
1525
1526 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1527 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1528 return -1;
1529 }
1530
1531 if (stats) {
1532 memset(stats, 0, sizeof *stats);
1533 }
1534 return netdev_offload_dpdk_destroy_flow(netdev, ufid,
1535 rte_flow_data->rte_flow);
1536 }
1537
1538 static int
1539 netdev_offload_dpdk_init_flow_api(struct netdev *netdev)
1540 {
1541 return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP;
1542 }
1543
1544 static int
1545 netdev_offload_dpdk_flow_get(struct netdev *netdev,
1546 struct match *match OVS_UNUSED,
1547 struct nlattr **actions OVS_UNUSED,
1548 const ovs_u128 *ufid,
1549 struct dpif_flow_stats *stats,
1550 struct dpif_flow_attrs *attrs,
1551 struct ofpbuf *buf OVS_UNUSED)
1552 {
1553 struct rte_flow_query_count query = { .reset = 1 };
1554 struct ufid_to_rte_flow_data *rte_flow_data;
1555 struct rte_flow_error error;
1556 int ret = 0;
1557
1558 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1559 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1560 ret = -1;
1561 goto out;
1562 }
1563
1564 attrs->offloaded = true;
1565 if (!rte_flow_data->actions_offloaded) {
1566 attrs->dp_layer = "ovs";
1567 memset(stats, 0, sizeof *stats);
1568 goto out;
1569 }
1570 attrs->dp_layer = "dpdk";
1571 ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
1572 &query, &error);
1573 if (ret) {
1574 VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p",
1575 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
1576 rte_flow_data->rte_flow);
1577 goto out;
1578 }
1579 rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
1580 rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
1581 if (query.hits_set && query.hits) {
1582 rte_flow_data->stats.used = time_msec();
1583 }
1584 memcpy(stats, &rte_flow_data->stats, sizeof *stats);
1585 out:
1586 attrs->dp_extra_info = NULL;
1587 return ret;
1588 }
1589
1590 const struct netdev_flow_api netdev_offload_dpdk = {
1591 .type = "dpdk_flow_api",
1592 .flow_put = netdev_offload_dpdk_flow_put,
1593 .flow_del = netdev_offload_dpdk_flow_del,
1594 .init_flow_api = netdev_offload_dpdk_init_flow_api,
1595 .flow_get = netdev_offload_dpdk_flow_get,
1596 };