]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-offload-dpdk.c
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / netdev-offload-dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 #include <config.h>
18
19 #include <sys/types.h>
20 #include <netinet/ip6.h>
21 #include <rte_flow.h>
22
23 #include "cmap.h"
24 #include "dpif-netdev.h"
25 #include "netdev-offload-provider.h"
26 #include "netdev-provider.h"
27 #include "openvswitch/match.h"
28 #include "openvswitch/vlog.h"
29 #include "packets.h"
30 #include "uuid.h"
31
32 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
33 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
34
35 /* Thread-safety
36 * =============
37 *
38 * Below API is NOT thread safe in following terms:
39 *
40 * - The caller must be sure that none of these functions will be called
41 * simultaneously. Even for different 'netdev's.
42 *
43 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
44 *
45 * - The caller must be sure that 'netdev' configuration will not be changed.
46 * For example, simultaneous call of 'netdev_reconfigure()' for the same
47 * 'netdev' is forbidden.
48 *
49 * For current implementation all above restrictions could be fulfilled by
50 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
51
52 /*
53 * A mapping from ufid to dpdk rte_flow.
54 */
55 static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
56
57 struct ufid_to_rte_flow_data {
58 struct cmap_node node;
59 ovs_u128 ufid;
60 struct rte_flow *rte_flow;
61 bool actions_offloaded;
62 struct dpif_flow_stats stats;
63 };
64
65 /* Find rte_flow with @ufid. */
66 static struct ufid_to_rte_flow_data *
67 ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
68 {
69 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
70 struct ufid_to_rte_flow_data *data;
71
72 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
73 if (ovs_u128_equals(*ufid, data->ufid)) {
74 return data;
75 }
76 }
77
78 return NULL;
79 }
80
81 static inline struct ufid_to_rte_flow_data *
82 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
83 struct rte_flow *rte_flow, bool actions_offloaded)
84 {
85 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
86 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
87 struct ufid_to_rte_flow_data *data_prev;
88
89 /*
90 * We should not simply overwrite an existing rte flow.
91 * We should have deleted it first before re-adding it.
92 * Thus, if following assert triggers, something is wrong:
93 * the rte_flow is not destroyed.
94 */
95 data_prev = ufid_to_rte_flow_data_find(ufid);
96 if (data_prev) {
97 ovs_assert(data_prev->rte_flow == NULL);
98 }
99
100 data->ufid = *ufid;
101 data->rte_flow = rte_flow;
102 data->actions_offloaded = actions_offloaded;
103
104 cmap_insert(&ufid_to_rte_flow,
105 CONST_CAST(struct cmap_node *, &data->node), hash);
106 return data;
107 }
108
109 static inline void
110 ufid_to_rte_flow_disassociate(const ovs_u128 *ufid)
111 {
112 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
113 struct ufid_to_rte_flow_data *data;
114
115 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
116 if (ovs_u128_equals(*ufid, data->ufid)) {
117 cmap_remove(&ufid_to_rte_flow,
118 CONST_CAST(struct cmap_node *, &data->node), hash);
119 ovsrcu_postpone(free, data);
120 return;
121 }
122 }
123
124 VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow",
125 UUID_ARGS((struct uuid *) ufid));
126 }
127
128 /*
129 * To avoid individual xrealloc calls for each new element, a 'curent_max'
130 * is used to keep track of current allocated number of elements. Starts
131 * by 8 and doubles on each xrealloc call.
132 */
133 struct flow_patterns {
134 struct rte_flow_item *items;
135 int cnt;
136 int current_max;
137 };
138
139 struct flow_actions {
140 struct rte_flow_action *actions;
141 int cnt;
142 int current_max;
143 };
144
145 static void
146 dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
147 {
148 ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
149 attr->ingress ? "ingress " : "",
150 attr->egress ? "egress " : "", attr->priority, attr->group,
151 attr->transfer ? "transfer " : "");
152 }
153
154 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
155 * 'testpmd command'-like format. */
156 #define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
157 if (is_all_ones(&mask, sizeof mask)) { \
158 ds_put_format(s, field " is " fmt " ", spec_pri); \
159 } else if (!is_all_zeros(&mask, sizeof mask)) { \
160 ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
161 spec_pri, mask_pri); \
162 }
163
164 static void
165 dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
166 {
167 if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
168 const struct rte_flow_item_eth *eth_spec = item->spec;
169 const struct rte_flow_item_eth *eth_mask = item->mask;
170
171 ds_put_cstr(s, "eth ");
172 if (eth_spec) {
173 if (!eth_mask) {
174 eth_mask = &rte_flow_item_eth_mask;
175 }
176 DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT,
177 ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
178 ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes));
179 DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT,
180 ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
181 ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes));
182 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
183 ntohs(eth_spec->type),
184 ntohs(eth_mask->type));
185 }
186 ds_put_cstr(s, "/ ");
187 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
188 const struct rte_flow_item_vlan *vlan_spec = item->spec;
189 const struct rte_flow_item_vlan *vlan_mask = item->mask;
190
191 ds_put_cstr(s, "vlan ");
192 if (vlan_spec) {
193 if (!vlan_mask) {
194 vlan_mask = &rte_flow_item_vlan_mask;
195 }
196 DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16,
197 ntohs(vlan_spec->inner_type),
198 ntohs(vlan_mask->inner_type));
199 DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16,
200 ntohs(vlan_spec->tci), ntohs(vlan_mask->tci));
201 }
202 ds_put_cstr(s, "/ ");
203 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
204 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
205 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
206
207 ds_put_cstr(s, "ipv4 ");
208 if (ipv4_spec) {
209 if (!ipv4_mask) {
210 ipv4_mask = &rte_flow_item_ipv4_mask;
211 }
212 DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT,
213 IP_ARGS(ipv4_spec->hdr.src_addr),
214 IP_ARGS(ipv4_mask->hdr.src_addr));
215 DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT,
216 IP_ARGS(ipv4_spec->hdr.dst_addr),
217 IP_ARGS(ipv4_mask->hdr.dst_addr));
218 DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto",
219 "0x%"PRIx8, ipv4_spec->hdr.next_proto_id,
220 ipv4_mask->hdr.next_proto_id);
221 DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos",
222 "0x%"PRIx8, ipv4_spec->hdr.type_of_service,
223 ipv4_mask->hdr.type_of_service);
224 DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl",
225 "0x%"PRIx8, ipv4_spec->hdr.time_to_live,
226 ipv4_mask->hdr.time_to_live);
227 }
228 ds_put_cstr(s, "/ ");
229 } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
230 const struct rte_flow_item_udp *udp_spec = item->spec;
231 const struct rte_flow_item_udp *udp_mask = item->mask;
232
233 ds_put_cstr(s, "udp ");
234 if (udp_spec) {
235 if (!udp_mask) {
236 udp_mask = &rte_flow_item_udp_mask;
237 }
238 DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16,
239 ntohs(udp_spec->hdr.src_port),
240 ntohs(udp_mask->hdr.src_port));
241 DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16,
242 ntohs(udp_spec->hdr.dst_port),
243 ntohs(udp_mask->hdr.dst_port));
244 }
245 ds_put_cstr(s, "/ ");
246 } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
247 const struct rte_flow_item_sctp *sctp_spec = item->spec;
248 const struct rte_flow_item_sctp *sctp_mask = item->mask;
249
250 ds_put_cstr(s, "sctp ");
251 if (sctp_spec) {
252 if (!sctp_mask) {
253 sctp_mask = &rte_flow_item_sctp_mask;
254 }
255 DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16,
256 ntohs(sctp_spec->hdr.src_port),
257 ntohs(sctp_mask->hdr.src_port));
258 DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16,
259 ntohs(sctp_spec->hdr.dst_port),
260 ntohs(sctp_mask->hdr.dst_port));
261 }
262 ds_put_cstr(s, "/ ");
263 } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) {
264 const struct rte_flow_item_icmp *icmp_spec = item->spec;
265 const struct rte_flow_item_icmp *icmp_mask = item->mask;
266
267 ds_put_cstr(s, "icmp ");
268 if (icmp_spec) {
269 if (!icmp_mask) {
270 icmp_mask = &rte_flow_item_icmp_mask;
271 }
272 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8,
273 icmp_spec->hdr.icmp_type,
274 icmp_mask->hdr.icmp_type);
275 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8,
276 icmp_spec->hdr.icmp_code,
277 icmp_mask->hdr.icmp_code);
278 }
279 ds_put_cstr(s, "/ ");
280 } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
281 const struct rte_flow_item_tcp *tcp_spec = item->spec;
282 const struct rte_flow_item_tcp *tcp_mask = item->mask;
283
284 ds_put_cstr(s, "tcp ");
285 if (tcp_spec) {
286 if (!tcp_mask) {
287 tcp_mask = &rte_flow_item_tcp_mask;
288 }
289 DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16,
290 ntohs(tcp_spec->hdr.src_port),
291 ntohs(tcp_mask->hdr.src_port));
292 DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16,
293 ntohs(tcp_spec->hdr.dst_port),
294 ntohs(tcp_mask->hdr.dst_port));
295 DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8,
296 tcp_spec->hdr.tcp_flags,
297 tcp_mask->hdr.tcp_flags);
298 }
299 ds_put_cstr(s, "/ ");
300 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
301 const struct rte_flow_item_ipv6 *ipv6_spec = item->spec;
302 const struct rte_flow_item_ipv6 *ipv6_mask = item->mask;
303
304 char addr_str[INET6_ADDRSTRLEN];
305 char mask_str[INET6_ADDRSTRLEN];
306 struct in6_addr addr, mask;
307
308 ds_put_cstr(s, "ipv6 ");
309 if (ipv6_spec) {
310 if (!ipv6_mask) {
311 ipv6_mask = &rte_flow_item_ipv6_mask;
312 }
313 memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr);
314 memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask);
315 ipv6_string_mapped(addr_str, &addr);
316 ipv6_string_mapped(mask_str, &mask);
317 DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str);
318
319 memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr);
320 memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask);
321 ipv6_string_mapped(addr_str, &addr);
322 ipv6_string_mapped(mask_str, &mask);
323 DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str);
324
325 DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8,
326 ipv6_spec->hdr.proto, ipv6_mask->hdr.proto);
327 DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32,
328 ntohl(ipv6_spec->hdr.vtc_flow),
329 ntohl(ipv6_mask->hdr.vtc_flow));
330 DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8,
331 ipv6_spec->hdr.hop_limits,
332 ipv6_mask->hdr.hop_limits);
333 }
334 ds_put_cstr(s, "/ ");
335 } else {
336 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
337 }
338 }
339
340 static void
341 dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
342 {
343 const struct rte_flow_item_eth *eth = NULL;
344 const struct rte_flow_item_ipv4 *ipv4 = NULL;
345 const struct rte_flow_item_ipv6 *ipv6 = NULL;
346 const struct rte_flow_item_udp *udp = NULL;
347 const struct rte_flow_item_vxlan *vxlan = NULL;
348
349 for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
350 if (items->type == RTE_FLOW_ITEM_TYPE_ETH) {
351 eth = items->spec;
352 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) {
353 ipv4 = items->spec;
354 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) {
355 ipv6 = items->spec;
356 } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) {
357 udp = items->spec;
358 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
359 vxlan = items->spec;
360 }
361 }
362
363 ds_put_format(s, "set vxlan ip-version %s ",
364 ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
365 if (vxlan) {
366 ds_put_format(s, "vni %"PRIu32" ",
367 ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
368 }
369 if (udp) {
370 ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
371 ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port));
372 }
373 if (ipv4) {
374 ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ",
375 IP_ARGS(ipv4->hdr.src_addr),
376 IP_ARGS(ipv4->hdr.dst_addr));
377 }
378 if (ipv6) {
379 struct in6_addr addr;
380
381 ds_put_cstr(s, "ip-src ");
382 memcpy(&addr, ipv6->hdr.src_addr, sizeof addr);
383 ipv6_format_mapped(&addr, s);
384 ds_put_cstr(s, " ip-dst ");
385 memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr);
386 ipv6_format_mapped(&addr, s);
387 ds_put_cstr(s, " ");
388 }
389 if (eth) {
390 ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT,
391 ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes),
392 ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes));
393 }
394 }
395
396 static void
397 dump_flow_action(struct ds *s, struct ds *s_extra,
398 const struct rte_flow_action *actions)
399 {
400 if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
401 const struct rte_flow_action_mark *mark = actions->conf;
402
403 ds_put_cstr(s, "mark ");
404 if (mark) {
405 ds_put_format(s, "id %d ", mark->id);
406 }
407 ds_put_cstr(s, "/ ");
408 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
409 ds_put_cstr(s, "rss / ");
410 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
411 ds_put_cstr(s, "count / ");
412 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
413 const struct rte_flow_action_port_id *port_id = actions->conf;
414
415 ds_put_cstr(s, "port_id ");
416 if (port_id) {
417 ds_put_format(s, "original %d id %d ",
418 port_id->original, port_id->id);
419 }
420 ds_put_cstr(s, "/ ");
421 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
422 ds_put_cstr(s, "drop / ");
423 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ||
424 actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) {
425 const struct rte_flow_action_set_mac *set_mac = actions->conf;
426
427 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST
428 ? "dst" : "src";
429
430 ds_put_format(s, "set_mac_%s ", dirstr);
431 if (set_mac) {
432 ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ",
433 ETH_ADDR_BYTES_ARGS(set_mac->mac_addr));
434 }
435 ds_put_cstr(s, "/ ");
436 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ||
437 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) {
438 const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf;
439 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
440 ? "dst" : "src";
441
442 ds_put_format(s, "set_ipv4_%s ", dirstr);
443 if (set_ipv4) {
444 ds_put_format(s, "ipv4_addr "IP_FMT" ",
445 IP_ARGS(set_ipv4->ipv4_addr));
446 }
447 ds_put_cstr(s, "/ ");
448 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
449 const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
450
451 ds_put_cstr(s, "set_ttl ");
452 if (set_ttl) {
453 ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value);
454 }
455 ds_put_cstr(s, "/ ");
456 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ||
457 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) {
458 const struct rte_flow_action_set_tp *set_tp = actions->conf;
459 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST
460 ? "dst" : "src";
461
462 ds_put_format(s, "set_tp_%s ", dirstr);
463 if (set_tp) {
464 ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port));
465 }
466 ds_put_cstr(s, "/ ");
467 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) {
468 const struct rte_flow_action_of_push_vlan *of_push_vlan =
469 actions->conf;
470
471 ds_put_cstr(s, "of_push_vlan ");
472 if (of_push_vlan) {
473 ds_put_format(s, "ethertype 0x%"PRIx16" ",
474 ntohs(of_push_vlan->ethertype));
475 }
476 ds_put_cstr(s, "/ ");
477 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
478 const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp =
479 actions->conf;
480
481 ds_put_cstr(s, "of_set_vlan_pcp ");
482 if (of_set_vlan_pcp) {
483 ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp);
484 }
485 ds_put_cstr(s, "/ ");
486 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
487 const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid =
488 actions->conf;
489
490 ds_put_cstr(s, "of_set_vlan_vid ");
491 if (of_set_vlan_vid) {
492 ds_put_format(s, "vlan_vid %"PRIu16" ",
493 ntohs(of_set_vlan_vid->vlan_vid));
494 }
495 ds_put_cstr(s, "/ ");
496 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) {
497 ds_put_cstr(s, "of_pop_vlan / ");
498 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ||
499 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) {
500 const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf;
501
502 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
503 ? "dst" : "src";
504
505 ds_put_format(s, "set_ipv6_%s ", dirstr);
506 if (set_ipv6) {
507 ds_put_cstr(s, "ipv6_addr ");
508 ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s);
509 ds_put_cstr(s, " ");
510 }
511 ds_put_cstr(s, "/ ");
512 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
513 const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
514
515 ds_put_cstr(s, "raw_encap index 0 / ");
516 if (raw_encap) {
517 ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw "
518 "pattern is ", raw_encap->size);
519 for (int i = 0; i < raw_encap->size; i++) {
520 ds_put_format(s_extra, "%02x", raw_encap->data[i]);
521 }
522 ds_put_cstr(s_extra, " / end_set;");
523 }
524 } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
525 const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
526 const struct rte_flow_item *items = vxlan_encap->definition;
527
528 ds_put_cstr(s, "vxlan_encap / ");
529 dump_vxlan_encap(s_extra, items);
530 ds_put_cstr(s_extra, ";");
531 } else {
532 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
533 }
534 }
535
536 static struct ds *
537 dump_flow(struct ds *s, struct ds *s_extra,
538 const struct rte_flow_attr *attr,
539 const struct rte_flow_item *items,
540 const struct rte_flow_action *actions)
541 {
542 if (attr) {
543 dump_flow_attr(s, attr);
544 }
545 ds_put_cstr(s, "pattern ");
546 while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
547 dump_flow_pattern(s, items++);
548 }
549 ds_put_cstr(s, "end actions ");
550 while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
551 dump_flow_action(s, s_extra, actions++);
552 }
553 ds_put_cstr(s, "end");
554 return s;
555 }
556
557 static struct rte_flow *
558 netdev_offload_dpdk_flow_create(struct netdev *netdev,
559 const struct rte_flow_attr *attr,
560 const struct rte_flow_item *items,
561 const struct rte_flow_action *actions,
562 struct rte_flow_error *error)
563 {
564 struct ds s_extra = DS_EMPTY_INITIALIZER;
565 struct ds s = DS_EMPTY_INITIALIZER;
566 struct rte_flow *flow;
567 char *extra_str;
568
569 flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
570 if (flow) {
571 if (!VLOG_DROP_DBG(&rl)) {
572 dump_flow(&s, &s_extra, attr, items, actions);
573 extra_str = ds_cstr(&s_extra);
574 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
575 netdev_get_name(netdev), (intptr_t) flow, extra_str,
576 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
577 }
578 } else {
579 enum vlog_level level = VLL_WARN;
580
581 if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) {
582 level = VLL_DBG;
583 }
584 VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
585 netdev_get_name(netdev), error->type, error->message);
586 if (!vlog_should_drop(&this_module, level, &rl)) {
587 dump_flow(&s, &s_extra, attr, items, actions);
588 extra_str = ds_cstr(&s_extra);
589 VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
590 netdev_get_name(netdev), extra_str,
591 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
592 }
593 }
594 ds_destroy(&s);
595 ds_destroy(&s_extra);
596 return flow;
597 }
598
599 static void
600 add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
601 const void *spec, const void *mask)
602 {
603 int cnt = patterns->cnt;
604
605 if (cnt == 0) {
606 patterns->current_max = 8;
607 patterns->items = xcalloc(patterns->current_max,
608 sizeof *patterns->items);
609 } else if (cnt == patterns->current_max) {
610 patterns->current_max *= 2;
611 patterns->items = xrealloc(patterns->items, patterns->current_max *
612 sizeof *patterns->items);
613 }
614
615 patterns->items[cnt].type = type;
616 patterns->items[cnt].spec = spec;
617 patterns->items[cnt].mask = mask;
618 patterns->items[cnt].last = NULL;
619 patterns->cnt++;
620 }
621
622 static void
623 add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
624 const void *conf)
625 {
626 int cnt = actions->cnt;
627
628 if (cnt == 0) {
629 actions->current_max = 8;
630 actions->actions = xcalloc(actions->current_max,
631 sizeof *actions->actions);
632 } else if (cnt == actions->current_max) {
633 actions->current_max *= 2;
634 actions->actions = xrealloc(actions->actions, actions->current_max *
635 sizeof *actions->actions);
636 }
637
638 actions->actions[cnt].type = type;
639 actions->actions[cnt].conf = conf;
640 actions->cnt++;
641 }
642
643 static void
644 free_flow_patterns(struct flow_patterns *patterns)
645 {
646 int i;
647
648 for (i = 0; i < patterns->cnt; i++) {
649 if (patterns->items[i].spec) {
650 free(CONST_CAST(void *, patterns->items[i].spec));
651 }
652 if (patterns->items[i].mask) {
653 free(CONST_CAST(void *, patterns->items[i].mask));
654 }
655 }
656 free(patterns->items);
657 patterns->items = NULL;
658 patterns->cnt = 0;
659 }
660
661 static void
662 free_flow_actions(struct flow_actions *actions)
663 {
664 int i;
665
666 for (i = 0; i < actions->cnt; i++) {
667 if (actions->actions[i].conf) {
668 free(CONST_CAST(void *, actions->actions[i].conf));
669 }
670 }
671 free(actions->actions);
672 actions->actions = NULL;
673 actions->cnt = 0;
674 }
675
676 static int
677 parse_flow_match(struct flow_patterns *patterns,
678 struct match *match)
679 {
680 uint8_t *next_proto_mask = NULL;
681 struct flow *consumed_masks;
682 uint8_t proto = 0;
683
684 consumed_masks = &match->wc.masks;
685
686 if (!flow_tnl_dst_is_set(&match->flow.tunnel)) {
687 memset(&consumed_masks->tunnel, 0, sizeof consumed_masks->tunnel);
688 }
689
690 memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port);
691 /* recirc id must be zero. */
692 if (match->wc.masks.recirc_id & match->flow.recirc_id) {
693 return -1;
694 }
695 consumed_masks->recirc_id = 0;
696 consumed_masks->packet_type = 0;
697
698 /* Eth */
699 if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow)
700 && eth_addr_is_zero(match->wc.masks.dl_dst)
701 && eth_addr_is_zero(match->wc.masks.dl_src)) {
702 /*
703 * This is a temporary work around to fix ethernet pattern for partial
704 * hardware offload for X710 devices. This fix will be reverted once
705 * the issue is fixed within the i40e PMD driver.
706 */
707 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
708
709 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
710 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
711 consumed_masks->dl_type = 0;
712 } else if (match->wc.masks.dl_type ||
713 !eth_addr_is_zero(match->wc.masks.dl_src) ||
714 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
715 struct rte_flow_item_eth *spec, *mask;
716
717 spec = xzalloc(sizeof *spec);
718 mask = xzalloc(sizeof *mask);
719
720 memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst);
721 memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src);
722 spec->type = match->flow.dl_type;
723
724 memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst);
725 memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src);
726 mask->type = match->wc.masks.dl_type;
727
728 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
729 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
730 consumed_masks->dl_type = 0;
731
732 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
733 }
734
735 /* VLAN */
736 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
737 struct rte_flow_item_vlan *spec, *mask;
738
739 spec = xzalloc(sizeof *spec);
740 mask = xzalloc(sizeof *mask);
741
742 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
743 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
744
745 /* Match any protocols. */
746 mask->inner_type = 0;
747
748 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
749 }
750 /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
751 * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
752 * scope to handle that.
753 */
754 memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]);
755
756 /* IP v4 */
757 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
758 struct rte_flow_item_ipv4 *spec, *mask;
759
760 spec = xzalloc(sizeof *spec);
761 mask = xzalloc(sizeof *mask);
762
763 spec->hdr.type_of_service = match->flow.nw_tos;
764 spec->hdr.time_to_live = match->flow.nw_ttl;
765 spec->hdr.next_proto_id = match->flow.nw_proto;
766 spec->hdr.src_addr = match->flow.nw_src;
767 spec->hdr.dst_addr = match->flow.nw_dst;
768
769 mask->hdr.type_of_service = match->wc.masks.nw_tos;
770 mask->hdr.time_to_live = match->wc.masks.nw_ttl;
771 mask->hdr.next_proto_id = match->wc.masks.nw_proto;
772 mask->hdr.src_addr = match->wc.masks.nw_src;
773 mask->hdr.dst_addr = match->wc.masks.nw_dst;
774
775 consumed_masks->nw_tos = 0;
776 consumed_masks->nw_ttl = 0;
777 consumed_masks->nw_proto = 0;
778 consumed_masks->nw_src = 0;
779 consumed_masks->nw_dst = 0;
780
781 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask);
782
783 /* Save proto for L4 protocol setup. */
784 proto = spec->hdr.next_proto_id &
785 mask->hdr.next_proto_id;
786 next_proto_mask = &mask->hdr.next_proto_id;
787 }
788 /* If fragmented, then don't HW accelerate - for now. */
789 if (match->wc.masks.nw_frag & match->flow.nw_frag) {
790 return -1;
791 }
792 consumed_masks->nw_frag = 0;
793
794 /* IP v6 */
795 if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {
796 struct rte_flow_item_ipv6 *spec, *mask;
797
798 spec = xzalloc(sizeof *spec);
799 mask = xzalloc(sizeof *mask);
800
801 spec->hdr.proto = match->flow.nw_proto;
802 spec->hdr.hop_limits = match->flow.nw_ttl;
803 spec->hdr.vtc_flow =
804 htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
805 memcpy(spec->hdr.src_addr, &match->flow.ipv6_src,
806 sizeof spec->hdr.src_addr);
807 memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst,
808 sizeof spec->hdr.dst_addr);
809
810 mask->hdr.proto = match->wc.masks.nw_proto;
811 mask->hdr.hop_limits = match->wc.masks.nw_ttl;
812 mask->hdr.vtc_flow =
813 htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
814 memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src,
815 sizeof mask->hdr.src_addr);
816 memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst,
817 sizeof mask->hdr.dst_addr);
818
819 consumed_masks->nw_proto = 0;
820 consumed_masks->nw_ttl = 0;
821 consumed_masks->nw_tos = 0;
822 memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src);
823 memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst);
824
825 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask);
826
827 /* Save proto for L4 protocol setup. */
828 proto = spec->hdr.proto & mask->hdr.proto;
829 next_proto_mask = &mask->hdr.proto;
830 }
831
832 if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
833 proto != IPPROTO_SCTP && proto != IPPROTO_TCP &&
834 (match->wc.masks.tp_src ||
835 match->wc.masks.tp_dst ||
836 match->wc.masks.tcp_flags)) {
837 VLOG_DBG("L4 Protocol (%u) not supported", proto);
838 return -1;
839 }
840
841 if (proto == IPPROTO_TCP) {
842 struct rte_flow_item_tcp *spec, *mask;
843
844 spec = xzalloc(sizeof *spec);
845 mask = xzalloc(sizeof *mask);
846
847 spec->hdr.src_port = match->flow.tp_src;
848 spec->hdr.dst_port = match->flow.tp_dst;
849 spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8;
850 spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
851
852 mask->hdr.src_port = match->wc.masks.tp_src;
853 mask->hdr.dst_port = match->wc.masks.tp_dst;
854 mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
855 mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
856
857 consumed_masks->tp_src = 0;
858 consumed_masks->tp_dst = 0;
859 consumed_masks->tcp_flags = 0;
860
861 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
862
863 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
864 if (next_proto_mask) {
865 *next_proto_mask = 0;
866 }
867 } else if (proto == IPPROTO_UDP) {
868 struct rte_flow_item_udp *spec, *mask;
869
870 spec = xzalloc(sizeof *spec);
871 mask = xzalloc(sizeof *mask);
872
873 spec->hdr.src_port = match->flow.tp_src;
874 spec->hdr.dst_port = match->flow.tp_dst;
875
876 mask->hdr.src_port = match->wc.masks.tp_src;
877 mask->hdr.dst_port = match->wc.masks.tp_dst;
878
879 consumed_masks->tp_src = 0;
880 consumed_masks->tp_dst = 0;
881
882 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
883
884 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
885 if (next_proto_mask) {
886 *next_proto_mask = 0;
887 }
888 } else if (proto == IPPROTO_SCTP) {
889 struct rte_flow_item_sctp *spec, *mask;
890
891 spec = xzalloc(sizeof *spec);
892 mask = xzalloc(sizeof *mask);
893
894 spec->hdr.src_port = match->flow.tp_src;
895 spec->hdr.dst_port = match->flow.tp_dst;
896
897 mask->hdr.src_port = match->wc.masks.tp_src;
898 mask->hdr.dst_port = match->wc.masks.tp_dst;
899
900 consumed_masks->tp_src = 0;
901 consumed_masks->tp_dst = 0;
902
903 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
904
905 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
906 if (next_proto_mask) {
907 *next_proto_mask = 0;
908 }
909 } else if (proto == IPPROTO_ICMP) {
910 struct rte_flow_item_icmp *spec, *mask;
911
912 spec = xzalloc(sizeof *spec);
913 mask = xzalloc(sizeof *mask);
914
915 spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src);
916 spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst);
917
918 mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src);
919 mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst);
920
921 consumed_masks->tp_src = 0;
922 consumed_masks->tp_dst = 0;
923
924 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
925
926 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
927 if (next_proto_mask) {
928 *next_proto_mask = 0;
929 }
930 }
931
932 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
933
934 if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) {
935 return -1;
936 }
937 return 0;
938 }
939
940 static void
941 add_flow_mark_rss_actions(struct flow_actions *actions,
942 uint32_t flow_mark,
943 const struct netdev *netdev)
944 {
945 struct rte_flow_action_mark *mark;
946 struct action_rss_data {
947 struct rte_flow_action_rss conf;
948 uint16_t queue[0];
949 } *rss_data;
950 BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0);
951 int i;
952
953 mark = xzalloc(sizeof *mark);
954
955 mark->id = flow_mark;
956 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark);
957
958 rss_data = xmalloc(sizeof *rss_data +
959 netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
960 *rss_data = (struct action_rss_data) {
961 .conf = (struct rte_flow_action_rss) {
962 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
963 .level = 0,
964 .types = 0,
965 .queue_num = netdev_n_rxq(netdev),
966 .queue = rss_data->queue,
967 .key_len = 0,
968 .key = NULL
969 },
970 };
971
972 /* Override queue array with default. */
973 for (i = 0; i < netdev_n_rxq(netdev); i++) {
974 rss_data->queue[i] = i;
975 }
976
977 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
978 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
979 }
980
981 static struct rte_flow *
982 netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
983 struct netdev *netdev,
984 uint32_t flow_mark)
985 {
986 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
987 const struct rte_flow_attr flow_attr = {
988 .group = 0,
989 .priority = 0,
990 .ingress = 1,
991 .egress = 0
992 };
993 struct rte_flow_error error;
994 struct rte_flow *flow;
995
996 add_flow_mark_rss_actions(&actions, flow_mark, netdev);
997
998 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
999 actions.actions, &error);
1000
1001 free_flow_actions(&actions);
1002 return flow;
1003 }
1004
1005 static void
1006 add_count_action(struct flow_actions *actions)
1007 {
1008 struct rte_flow_action_count *count = xzalloc(sizeof *count);
1009
1010 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
1011 }
1012
1013 static int
1014 add_port_id_action(struct flow_actions *actions,
1015 struct netdev *outdev)
1016 {
1017 struct rte_flow_action_port_id *port_id;
1018 int outdev_id;
1019
1020 outdev_id = netdev_dpdk_get_port_id(outdev);
1021 if (outdev_id < 0) {
1022 return -1;
1023 }
1024 port_id = xzalloc(sizeof *port_id);
1025 port_id->id = outdev_id;
1026 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
1027 return 0;
1028 }
1029
1030 static int
1031 add_output_action(struct netdev *netdev,
1032 struct flow_actions *actions,
1033 const struct nlattr *nla)
1034 {
1035 struct netdev *outdev;
1036 odp_port_t port;
1037 int ret = 0;
1038
1039 port = nl_attr_get_odp_port(nla);
1040 outdev = netdev_ports_get(port, netdev->dpif_type);
1041 if (outdev == NULL) {
1042 VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port);
1043 return -1;
1044 }
1045 if (!netdev_flow_api_equals(netdev, outdev) ||
1046 add_port_id_action(actions, outdev)) {
1047 VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.",
1048 netdev_get_name(netdev), netdev_get_name(outdev));
1049 ret = -1;
1050 }
1051 netdev_close(outdev);
1052 return ret;
1053 }
1054
1055 static int
1056 add_set_flow_action__(struct flow_actions *actions,
1057 const void *value, void *mask,
1058 const size_t size, const int attr)
1059 {
1060 void *spec;
1061
1062 if (mask) {
1063 /* DPDK does not support partially masked set actions. In such
1064 * case, fail the offload.
1065 */
1066 if (is_all_zeros(mask, size)) {
1067 return 0;
1068 }
1069 if (!is_all_ones(mask, size)) {
1070 VLOG_DBG_RL(&rl, "Partial mask is not supported");
1071 return -1;
1072 }
1073 }
1074
1075 spec = xzalloc(size);
1076 memcpy(spec, value, size);
1077 add_flow_action(actions, attr, spec);
1078
1079 /* Clear used mask for later checking. */
1080 if (mask) {
1081 memset(mask, 0, size);
1082 }
1083 return 0;
1084 }
1085
1086 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1087 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src));
1088 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1089 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst));
1090 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1091 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src));
1092 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1093 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
1094 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1095 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
1096 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1097 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src));
1098 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1099 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst));
1100 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1101 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit));
1102 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1103 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src));
1104 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1105 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst));
1106 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1107 MEMBER_SIZEOF(struct ovs_key_udp, udp_src));
1108 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1109 MEMBER_SIZEOF(struct ovs_key_udp, udp_dst));
1110
1111 static int
1112 parse_set_actions(struct flow_actions *actions,
1113 const struct nlattr *set_actions,
1114 const size_t set_actions_len,
1115 bool masked)
1116 {
1117 const struct nlattr *sa;
1118 unsigned int sleft;
1119
1120 #define add_set_flow_action(field, type) \
1121 if (add_set_flow_action__(actions, &key->field, \
1122 mask ? CONST_CAST(void *, &mask->field) : NULL, \
1123 sizeof key->field, type)) { \
1124 return -1; \
1125 }
1126
1127 NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) {
1128 if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) {
1129 const struct ovs_key_ethernet *key = nl_attr_get(sa);
1130 const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL;
1131
1132 add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC);
1133 add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST);
1134
1135 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1136 VLOG_DBG_RL(&rl, "Unsupported ETHERNET set action");
1137 return -1;
1138 }
1139 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) {
1140 const struct ovs_key_ipv4 *key = nl_attr_get(sa);
1141 const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL;
1142
1143 add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC);
1144 add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST);
1145 add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL);
1146
1147 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1148 VLOG_DBG_RL(&rl, "Unsupported IPv4 set action");
1149 return -1;
1150 }
1151 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) {
1152 const struct ovs_key_ipv6 *key = nl_attr_get(sa);
1153 const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL;
1154
1155 add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC);
1156 add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST);
1157 add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL);
1158
1159 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1160 VLOG_DBG_RL(&rl, "Unsupported IPv6 set action");
1161 return -1;
1162 }
1163 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) {
1164 const struct ovs_key_tcp *key = nl_attr_get(sa);
1165 const struct ovs_key_tcp *mask = masked ? key + 1 : NULL;
1166
1167 add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1168 add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1169
1170 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1171 VLOG_DBG_RL(&rl, "Unsupported TCP set action");
1172 return -1;
1173 }
1174 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) {
1175 const struct ovs_key_udp *key = nl_attr_get(sa);
1176 const struct ovs_key_udp *mask = masked ? key + 1 : NULL;
1177
1178 add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1179 add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1180
1181 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1182 VLOG_DBG_RL(&rl, "Unsupported UDP set action");
1183 return -1;
1184 }
1185 } else {
1186 VLOG_DBG_RL(&rl,
1187 "Unsupported set action type %d", nl_attr_type(sa));
1188 return -1;
1189 }
1190 }
1191
1192 return 0;
1193 }
1194
1195 /* Maximum number of items in struct rte_flow_action_vxlan_encap.
1196 * ETH / IPv4(6) / UDP / VXLAN / END
1197 */
1198 #define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
1199
1200 static int
1201 add_vxlan_encap_action(struct flow_actions *actions,
1202 const void *header)
1203 {
1204 const struct eth_header *eth;
1205 const struct udp_header *udp;
1206 struct vxlan_data {
1207 struct rte_flow_action_vxlan_encap conf;
1208 struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM];
1209 } *vxlan_data;
1210 BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
1211 const void *vxlan;
1212 const void *l3;
1213 const void *l4;
1214 int field;
1215
1216 vxlan_data = xzalloc(sizeof *vxlan_data);
1217 field = 0;
1218
1219 eth = header;
1220 /* Ethernet */
1221 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
1222 vxlan_data->items[field].spec = eth;
1223 vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
1224 field++;
1225
1226 l3 = eth + 1;
1227 /* IP */
1228 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1229 /* IPv4 */
1230 const struct ip_header *ip = l3;
1231
1232 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
1233 vxlan_data->items[field].spec = ip;
1234 vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
1235
1236 if (ip->ip_proto != IPPROTO_UDP) {
1237 goto err;
1238 }
1239 l4 = (ip + 1);
1240 } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
1241 const struct ovs_16aligned_ip6_hdr *ip6 = l3;
1242
1243 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
1244 vxlan_data->items[field].spec = ip6;
1245 vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
1246
1247 if (ip6->ip6_nxt != IPPROTO_UDP) {
1248 goto err;
1249 }
1250 l4 = (ip6 + 1);
1251 } else {
1252 goto err;
1253 }
1254 field++;
1255
1256 udp = l4;
1257 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
1258 vxlan_data->items[field].spec = udp;
1259 vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
1260 field++;
1261
1262 vxlan = (udp + 1);
1263 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
1264 vxlan_data->items[field].spec = vxlan;
1265 vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
1266 field++;
1267
1268 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
1269
1270 vxlan_data->conf.definition = vxlan_data->items;
1271
1272 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
1273
1274 return 0;
1275 err:
1276 free(vxlan_data);
1277 return -1;
1278 }
1279
1280 static int
1281 parse_vlan_push_action(struct flow_actions *actions,
1282 const struct ovs_action_push_vlan *vlan_push)
1283 {
1284 struct rte_flow_action_of_push_vlan *rte_push_vlan;
1285 struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp;
1286 struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid;
1287
1288 rte_push_vlan = xzalloc(sizeof *rte_push_vlan);
1289 rte_push_vlan->ethertype = vlan_push->vlan_tpid;
1290 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan);
1291
1292 rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp);
1293 rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci);
1294 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
1295 rte_vlan_pcp);
1296
1297 rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid);
1298 rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci));
1299 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
1300 rte_vlan_vid);
1301 return 0;
1302 }
1303
1304 static int
1305 parse_clone_actions(struct netdev *netdev,
1306 struct flow_actions *actions,
1307 const struct nlattr *clone_actions,
1308 const size_t clone_actions_len)
1309 {
1310 const struct nlattr *ca;
1311 unsigned int cleft;
1312
1313 NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
1314 int clone_type = nl_attr_type(ca);
1315
1316 if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
1317 const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
1318 struct rte_flow_action_raw_encap *raw_encap;
1319
1320 if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
1321 !add_vxlan_encap_action(actions, tnl_push->header)) {
1322 continue;
1323 }
1324
1325 raw_encap = xzalloc(sizeof *raw_encap);
1326 raw_encap->data = (uint8_t *) tnl_push->header;
1327 raw_encap->preserve = NULL;
1328 raw_encap->size = tnl_push->header_len;
1329
1330 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
1331 raw_encap);
1332 } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
1333 if (add_output_action(netdev, actions, ca)) {
1334 return -1;
1335 }
1336 } else {
1337 VLOG_DBG_RL(&rl,
1338 "Unsupported nested action inside clone(), "
1339 "action type: %d", clone_type);
1340 return -1;
1341 }
1342 }
1343 return 0;
1344 }
1345
1346 static int
1347 parse_flow_actions(struct netdev *netdev,
1348 struct flow_actions *actions,
1349 struct nlattr *nl_actions,
1350 size_t nl_actions_len)
1351 {
1352 struct nlattr *nla;
1353 size_t left;
1354
1355 add_count_action(actions);
1356 NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
1357 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1358 if (add_output_action(netdev, actions, nla)) {
1359 return -1;
1360 }
1361 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
1362 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL);
1363 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET ||
1364 nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
1365 const struct nlattr *set_actions = nl_attr_get(nla);
1366 const size_t set_actions_len = nl_attr_get_size(nla);
1367 bool masked = nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED;
1368
1369 if (parse_set_actions(actions, set_actions, set_actions_len,
1370 masked)) {
1371 return -1;
1372 }
1373 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) {
1374 const struct ovs_action_push_vlan *vlan = nl_attr_get(nla);
1375
1376 if (parse_vlan_push_action(actions, vlan)) {
1377 return -1;
1378 }
1379 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
1380 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL);
1381 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE &&
1382 left <= NLA_ALIGN(nla->nla_len)) {
1383 const struct nlattr *clone_actions = nl_attr_get(nla);
1384 size_t clone_actions_len = nl_attr_get_size(nla);
1385
1386 if (parse_clone_actions(netdev, actions, clone_actions,
1387 clone_actions_len)) {
1388 return -1;
1389 }
1390 } else {
1391 VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
1392 return -1;
1393 }
1394 }
1395
1396 if (nl_actions_len == 0) {
1397 VLOG_DBG_RL(&rl, "No actions provided");
1398 return -1;
1399 }
1400
1401 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
1402 return 0;
1403 }
1404
1405 static struct rte_flow *
1406 netdev_offload_dpdk_actions(struct netdev *netdev,
1407 struct flow_patterns *patterns,
1408 struct nlattr *nl_actions,
1409 size_t actions_len)
1410 {
1411 const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
1412 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
1413 struct rte_flow *flow = NULL;
1414 struct rte_flow_error error;
1415 int ret;
1416
1417 ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
1418 if (ret) {
1419 goto out;
1420 }
1421 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
1422 actions.actions, &error);
1423 out:
1424 free_flow_actions(&actions);
1425 return flow;
1426 }
1427
1428 static struct ufid_to_rte_flow_data *
1429 netdev_offload_dpdk_add_flow(struct netdev *netdev,
1430 struct match *match,
1431 struct nlattr *nl_actions,
1432 size_t actions_len,
1433 const ovs_u128 *ufid,
1434 struct offload_info *info)
1435 {
1436 struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
1437 struct ufid_to_rte_flow_data *flows_data = NULL;
1438 bool actions_offloaded = true;
1439 struct rte_flow *flow;
1440
1441 if (parse_flow_match(&patterns, match)) {
1442 VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported",
1443 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid));
1444 goto out;
1445 }
1446
1447 flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
1448 actions_len);
1449 if (!flow) {
1450 /* If we failed to offload the rule actions fallback to MARK+RSS
1451 * actions.
1452 */
1453 flow = netdev_offload_dpdk_mark_rss(&patterns, netdev,
1454 info->flow_mark);
1455 actions_offloaded = false;
1456 }
1457
1458 if (!flow) {
1459 goto out;
1460 }
1461 flows_data = ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
1462 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT,
1463 netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
1464
1465 out:
1466 free_flow_patterns(&patterns);
1467 return flows_data;
1468 }
1469
1470 static int
1471 netdev_offload_dpdk_destroy_flow(struct netdev *netdev,
1472 const ovs_u128 *ufid,
1473 struct rte_flow *rte_flow)
1474 {
1475 struct rte_flow_error error;
1476 int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
1477
1478 if (ret == 0) {
1479 ufid_to_rte_flow_disassociate(ufid);
1480 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR
1481 " flow destroy %d ufid " UUID_FMT,
1482 netdev_get_name(netdev), (intptr_t) rte_flow,
1483 netdev_dpdk_get_port_id(netdev),
1484 UUID_ARGS((struct uuid *) ufid));
1485 } else {
1486 VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT,
1487 netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev),
1488 UUID_ARGS((struct uuid *) ufid));
1489 }
1490
1491 return ret;
1492 }
1493
1494 static int
1495 netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
1496 struct nlattr *actions, size_t actions_len,
1497 const ovs_u128 *ufid, struct offload_info *info,
1498 struct dpif_flow_stats *stats)
1499 {
1500 struct ufid_to_rte_flow_data *rte_flow_data;
1501 struct dpif_flow_stats old_stats;
1502 bool modification = false;
1503 int ret;
1504
1505 /*
1506 * If an old rte_flow exists, it means it's a flow modification.
1507 * Here destroy the old rte flow first before adding a new one.
1508 * Keep the stats for the newly created rule.
1509 */
1510 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1511 if (rte_flow_data && rte_flow_data->rte_flow) {
1512 old_stats = rte_flow_data->stats;
1513 modification = true;
1514 ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
1515 rte_flow_data->rte_flow);
1516 if (ret < 0) {
1517 return ret;
1518 }
1519 }
1520
1521 rte_flow_data = netdev_offload_dpdk_add_flow(netdev, match, actions,
1522 actions_len, ufid, info);
1523 if (!rte_flow_data) {
1524 return -1;
1525 }
1526 if (modification) {
1527 rte_flow_data->stats = old_stats;
1528 }
1529 if (stats) {
1530 *stats = rte_flow_data->stats;
1531 }
1532 return 0;
1533 }
1534
1535 static int
1536 netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
1537 struct dpif_flow_stats *stats)
1538 {
1539 struct ufid_to_rte_flow_data *rte_flow_data;
1540
1541 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1542 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1543 return -1;
1544 }
1545
1546 if (stats) {
1547 memset(stats, 0, sizeof *stats);
1548 }
1549 return netdev_offload_dpdk_destroy_flow(netdev, ufid,
1550 rte_flow_data->rte_flow);
1551 }
1552
1553 static int
1554 netdev_offload_dpdk_init_flow_api(struct netdev *netdev)
1555 {
1556 return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP;
1557 }
1558
1559 static int
1560 netdev_offload_dpdk_flow_get(struct netdev *netdev,
1561 struct match *match OVS_UNUSED,
1562 struct nlattr **actions OVS_UNUSED,
1563 const ovs_u128 *ufid,
1564 struct dpif_flow_stats *stats,
1565 struct dpif_flow_attrs *attrs,
1566 struct ofpbuf *buf OVS_UNUSED)
1567 {
1568 struct rte_flow_query_count query = { .reset = 1 };
1569 struct ufid_to_rte_flow_data *rte_flow_data;
1570 struct rte_flow_error error;
1571 int ret = 0;
1572
1573 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1574 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1575 ret = -1;
1576 goto out;
1577 }
1578
1579 attrs->offloaded = true;
1580 if (!rte_flow_data->actions_offloaded) {
1581 attrs->dp_layer = "ovs";
1582 memset(stats, 0, sizeof *stats);
1583 goto out;
1584 }
1585 attrs->dp_layer = "dpdk";
1586 ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
1587 &query, &error);
1588 if (ret) {
1589 VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p",
1590 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
1591 rte_flow_data->rte_flow);
1592 goto out;
1593 }
1594 rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
1595 rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
1596 if (query.hits_set && query.hits) {
1597 rte_flow_data->stats.used = time_msec();
1598 }
1599 memcpy(stats, &rte_flow_data->stats, sizeof *stats);
1600 out:
1601 attrs->dp_extra_info = NULL;
1602 return ret;
1603 }
1604
1605 const struct netdev_flow_api netdev_offload_dpdk = {
1606 .type = "dpdk_flow_api",
1607 .flow_put = netdev_offload_dpdk_flow_put,
1608 .flow_del = netdev_offload_dpdk_flow_del,
1609 .init_flow_api = netdev_offload_dpdk_init_flow_api,
1610 .flow_get = netdev_offload_dpdk_flow_get,
1611 };