]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-offload-dpdk.c
netdev-offload-dpdk: Pass L4 proto-id to match in the L3 rte_flow_item.
[mirror_ovs.git] / lib / netdev-offload-dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 #include <config.h>
18
19 #include <sys/types.h>
20 #include <netinet/ip6.h>
21 #include <rte_flow.h>
22
23 #include "cmap.h"
24 #include "dpif-netdev.h"
25 #include "netdev-offload-provider.h"
26 #include "netdev-provider.h"
27 #include "openvswitch/match.h"
28 #include "openvswitch/vlog.h"
29 #include "packets.h"
30 #include "uuid.h"
31
32 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
33 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
34
35 /* Thread-safety
36 * =============
37 *
38 * Below API is NOT thread safe in following terms:
39 *
40 * - The caller must be sure that none of these functions will be called
41 * simultaneously. Even for different 'netdev's.
42 *
43 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
44 *
45 * - The caller must be sure that 'netdev' configuration will not be changed.
46 * For example, simultaneous call of 'netdev_reconfigure()' for the same
47 * 'netdev' is forbidden.
48 *
49 * For current implementation all above restrictions could be fulfilled by
50 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
51
52 /*
53 * A mapping from ufid to dpdk rte_flow.
54 */
55 static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
56
57 struct ufid_to_rte_flow_data {
58 struct cmap_node node;
59 ovs_u128 ufid;
60 struct rte_flow *rte_flow;
61 bool actions_offloaded;
62 struct dpif_flow_stats stats;
63 };
64
65 /* Find rte_flow with @ufid. */
66 static struct ufid_to_rte_flow_data *
67 ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
68 {
69 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
70 struct ufid_to_rte_flow_data *data;
71
72 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
73 if (ovs_u128_equals(*ufid, data->ufid)) {
74 return data;
75 }
76 }
77
78 return NULL;
79 }
80
81 static inline struct ufid_to_rte_flow_data *
82 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
83 struct rte_flow *rte_flow, bool actions_offloaded)
84 {
85 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
86 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
87 struct ufid_to_rte_flow_data *data_prev;
88
89 /*
90 * We should not simply overwrite an existing rte flow.
91 * We should have deleted it first before re-adding it.
92 * Thus, if following assert triggers, something is wrong:
93 * the rte_flow is not destroyed.
94 */
95 data_prev = ufid_to_rte_flow_data_find(ufid);
96 if (data_prev) {
97 ovs_assert(data_prev->rte_flow == NULL);
98 }
99
100 data->ufid = *ufid;
101 data->rte_flow = rte_flow;
102 data->actions_offloaded = actions_offloaded;
103
104 cmap_insert(&ufid_to_rte_flow,
105 CONST_CAST(struct cmap_node *, &data->node), hash);
106 return data;
107 }
108
109 static inline void
110 ufid_to_rte_flow_disassociate(const ovs_u128 *ufid)
111 {
112 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
113 struct ufid_to_rte_flow_data *data;
114
115 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
116 if (ovs_u128_equals(*ufid, data->ufid)) {
117 cmap_remove(&ufid_to_rte_flow,
118 CONST_CAST(struct cmap_node *, &data->node), hash);
119 ovsrcu_postpone(free, data);
120 return;
121 }
122 }
123
124 VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow",
125 UUID_ARGS((struct uuid *) ufid));
126 }
127
128 /*
129 * To avoid individual xrealloc calls for each new element, a 'curent_max'
130 * is used to keep track of current allocated number of elements. Starts
131 * by 8 and doubles on each xrealloc call.
132 */
133 struct flow_patterns {
134 struct rte_flow_item *items;
135 int cnt;
136 int current_max;
137 };
138
139 struct flow_actions {
140 struct rte_flow_action *actions;
141 int cnt;
142 int current_max;
143 };
144
145 static void
146 dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
147 {
148 ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
149 attr->ingress ? "ingress " : "",
150 attr->egress ? "egress " : "", attr->priority, attr->group,
151 attr->transfer ? "transfer " : "");
152 }
153
154 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
155 * 'testpmd command'-like format. */
156 #define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
157 if (is_all_ones(&mask, sizeof mask)) { \
158 ds_put_format(s, field " is " fmt " ", spec_pri); \
159 } else if (!is_all_zeros(&mask, sizeof mask)) { \
160 ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
161 spec_pri, mask_pri); \
162 }
163
164 static void
165 dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
166 {
167 if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
168 const struct rte_flow_item_eth *eth_spec = item->spec;
169 const struct rte_flow_item_eth *eth_mask = item->mask;
170
171 ds_put_cstr(s, "eth ");
172 if (eth_spec) {
173 if (!eth_mask) {
174 eth_mask = &rte_flow_item_eth_mask;
175 }
176 DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT,
177 ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
178 ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes));
179 DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT,
180 ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
181 ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes));
182 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
183 ntohs(eth_spec->type),
184 ntohs(eth_mask->type));
185 }
186 ds_put_cstr(s, "/ ");
187 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
188 const struct rte_flow_item_vlan *vlan_spec = item->spec;
189 const struct rte_flow_item_vlan *vlan_mask = item->mask;
190
191 ds_put_cstr(s, "vlan ");
192 if (vlan_spec) {
193 if (!vlan_mask) {
194 vlan_mask = &rte_flow_item_vlan_mask;
195 }
196 DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16,
197 ntohs(vlan_spec->inner_type),
198 ntohs(vlan_mask->inner_type));
199 DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16,
200 ntohs(vlan_spec->tci), ntohs(vlan_mask->tci));
201 }
202 ds_put_cstr(s, "/ ");
203 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
204 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
205 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
206
207 ds_put_cstr(s, "ipv4 ");
208 if (ipv4_spec) {
209 if (!ipv4_mask) {
210 ipv4_mask = &rte_flow_item_ipv4_mask;
211 }
212 DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT,
213 IP_ARGS(ipv4_spec->hdr.src_addr),
214 IP_ARGS(ipv4_mask->hdr.src_addr));
215 DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT,
216 IP_ARGS(ipv4_spec->hdr.dst_addr),
217 IP_ARGS(ipv4_mask->hdr.dst_addr));
218 DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto",
219 "0x%"PRIx8, ipv4_spec->hdr.next_proto_id,
220 ipv4_mask->hdr.next_proto_id);
221 DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos",
222 "0x%"PRIx8, ipv4_spec->hdr.type_of_service,
223 ipv4_mask->hdr.type_of_service);
224 DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl",
225 "0x%"PRIx8, ipv4_spec->hdr.time_to_live,
226 ipv4_mask->hdr.time_to_live);
227 }
228 ds_put_cstr(s, "/ ");
229 } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
230 const struct rte_flow_item_udp *udp_spec = item->spec;
231 const struct rte_flow_item_udp *udp_mask = item->mask;
232
233 ds_put_cstr(s, "udp ");
234 if (udp_spec) {
235 if (!udp_mask) {
236 udp_mask = &rte_flow_item_udp_mask;
237 }
238 DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16,
239 ntohs(udp_spec->hdr.src_port),
240 ntohs(udp_mask->hdr.src_port));
241 DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16,
242 ntohs(udp_spec->hdr.dst_port),
243 ntohs(udp_mask->hdr.dst_port));
244 }
245 ds_put_cstr(s, "/ ");
246 } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
247 const struct rte_flow_item_sctp *sctp_spec = item->spec;
248 const struct rte_flow_item_sctp *sctp_mask = item->mask;
249
250 ds_put_cstr(s, "sctp ");
251 if (sctp_spec) {
252 if (!sctp_mask) {
253 sctp_mask = &rte_flow_item_sctp_mask;
254 }
255 DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16,
256 ntohs(sctp_spec->hdr.src_port),
257 ntohs(sctp_mask->hdr.src_port));
258 DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16,
259 ntohs(sctp_spec->hdr.dst_port),
260 ntohs(sctp_mask->hdr.dst_port));
261 }
262 ds_put_cstr(s, "/ ");
263 } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) {
264 const struct rte_flow_item_icmp *icmp_spec = item->spec;
265 const struct rte_flow_item_icmp *icmp_mask = item->mask;
266
267 ds_put_cstr(s, "icmp ");
268 if (icmp_spec) {
269 if (!icmp_mask) {
270 icmp_mask = &rte_flow_item_icmp_mask;
271 }
272 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8,
273 icmp_spec->hdr.icmp_type,
274 icmp_mask->hdr.icmp_type);
275 DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8,
276 icmp_spec->hdr.icmp_code,
277 icmp_mask->hdr.icmp_code);
278 }
279 ds_put_cstr(s, "/ ");
280 } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
281 const struct rte_flow_item_tcp *tcp_spec = item->spec;
282 const struct rte_flow_item_tcp *tcp_mask = item->mask;
283
284 ds_put_cstr(s, "tcp ");
285 if (tcp_spec) {
286 if (!tcp_mask) {
287 tcp_mask = &rte_flow_item_tcp_mask;
288 }
289 DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16,
290 ntohs(tcp_spec->hdr.src_port),
291 ntohs(tcp_mask->hdr.src_port));
292 DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16,
293 ntohs(tcp_spec->hdr.dst_port),
294 ntohs(tcp_mask->hdr.dst_port));
295 DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8,
296 tcp_spec->hdr.tcp_flags,
297 tcp_mask->hdr.tcp_flags);
298 }
299 ds_put_cstr(s, "/ ");
300 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
301 const struct rte_flow_item_ipv6 *ipv6_spec = item->spec;
302 const struct rte_flow_item_ipv6 *ipv6_mask = item->mask;
303
304 char addr_str[INET6_ADDRSTRLEN];
305 char mask_str[INET6_ADDRSTRLEN];
306 struct in6_addr addr, mask;
307
308 ds_put_cstr(s, "ipv6 ");
309 if (ipv6_spec) {
310 if (!ipv6_mask) {
311 ipv6_mask = &rte_flow_item_ipv6_mask;
312 }
313 memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr);
314 memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask);
315 ipv6_string_mapped(addr_str, &addr);
316 ipv6_string_mapped(mask_str, &mask);
317 DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str);
318
319 memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr);
320 memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask);
321 ipv6_string_mapped(addr_str, &addr);
322 ipv6_string_mapped(mask_str, &mask);
323 DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str);
324
325 DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8,
326 ipv6_spec->hdr.proto, ipv6_mask->hdr.proto);
327 DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32,
328 ntohl(ipv6_spec->hdr.vtc_flow),
329 ntohl(ipv6_mask->hdr.vtc_flow));
330 DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8,
331 ipv6_spec->hdr.hop_limits,
332 ipv6_mask->hdr.hop_limits);
333 }
334 ds_put_cstr(s, "/ ");
335 } else {
336 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
337 }
338 }
339
340 static void
341 dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
342 {
343 const struct rte_flow_item_eth *eth = NULL;
344 const struct rte_flow_item_ipv4 *ipv4 = NULL;
345 const struct rte_flow_item_ipv6 *ipv6 = NULL;
346 const struct rte_flow_item_udp *udp = NULL;
347 const struct rte_flow_item_vxlan *vxlan = NULL;
348
349 for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
350 if (items->type == RTE_FLOW_ITEM_TYPE_ETH) {
351 eth = items->spec;
352 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) {
353 ipv4 = items->spec;
354 } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) {
355 ipv6 = items->spec;
356 } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) {
357 udp = items->spec;
358 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
359 vxlan = items->spec;
360 }
361 }
362
363 ds_put_format(s, "set vxlan ip-version %s ",
364 ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
365 if (vxlan) {
366 ds_put_format(s, "vni %"PRIu32" ",
367 ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
368 }
369 if (udp) {
370 ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
371 ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port));
372 }
373 if (ipv4) {
374 ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ",
375 IP_ARGS(ipv4->hdr.src_addr),
376 IP_ARGS(ipv4->hdr.dst_addr));
377 }
378 if (ipv6) {
379 struct in6_addr addr;
380
381 ds_put_cstr(s, "ip-src ");
382 memcpy(&addr, ipv6->hdr.src_addr, sizeof addr);
383 ipv6_format_mapped(&addr, s);
384 ds_put_cstr(s, " ip-dst ");
385 memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr);
386 ipv6_format_mapped(&addr, s);
387 ds_put_cstr(s, " ");
388 }
389 if (eth) {
390 ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT,
391 ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes),
392 ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes));
393 }
394 }
395
396 static void
397 dump_flow_action(struct ds *s, struct ds *s_extra,
398 const struct rte_flow_action *actions)
399 {
400 if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
401 const struct rte_flow_action_mark *mark = actions->conf;
402
403 ds_put_cstr(s, "mark ");
404 if (mark) {
405 ds_put_format(s, "id %d ", mark->id);
406 }
407 ds_put_cstr(s, "/ ");
408 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
409 ds_put_cstr(s, "rss / ");
410 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
411 ds_put_cstr(s, "count / ");
412 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
413 const struct rte_flow_action_port_id *port_id = actions->conf;
414
415 ds_put_cstr(s, "port_id ");
416 if (port_id) {
417 ds_put_format(s, "original %d id %d ",
418 port_id->original, port_id->id);
419 }
420 ds_put_cstr(s, "/ ");
421 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
422 ds_put_cstr(s, "drop / ");
423 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ||
424 actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) {
425 const struct rte_flow_action_set_mac *set_mac = actions->conf;
426
427 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST
428 ? "dst" : "src";
429
430 ds_put_format(s, "set_mac_%s ", dirstr);
431 if (set_mac) {
432 ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ",
433 ETH_ADDR_BYTES_ARGS(set_mac->mac_addr));
434 }
435 ds_put_cstr(s, "/ ");
436 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ||
437 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) {
438 const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf;
439 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
440 ? "dst" : "src";
441
442 ds_put_format(s, "set_ipv4_%s ", dirstr);
443 if (set_ipv4) {
444 ds_put_format(s, "ipv4_addr "IP_FMT" ",
445 IP_ARGS(set_ipv4->ipv4_addr));
446 }
447 ds_put_cstr(s, "/ ");
448 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
449 const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
450
451 ds_put_cstr(s, "set_ttl ");
452 if (set_ttl) {
453 ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value);
454 }
455 ds_put_cstr(s, "/ ");
456 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ||
457 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) {
458 const struct rte_flow_action_set_tp *set_tp = actions->conf;
459 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST
460 ? "dst" : "src";
461
462 ds_put_format(s, "set_tp_%s ", dirstr);
463 if (set_tp) {
464 ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port));
465 }
466 ds_put_cstr(s, "/ ");
467 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) {
468 const struct rte_flow_action_of_push_vlan *of_push_vlan =
469 actions->conf;
470
471 ds_put_cstr(s, "of_push_vlan ");
472 if (of_push_vlan) {
473 ds_put_format(s, "ethertype 0x%"PRIx16" ",
474 ntohs(of_push_vlan->ethertype));
475 }
476 ds_put_cstr(s, "/ ");
477 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
478 const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp =
479 actions->conf;
480
481 ds_put_cstr(s, "of_set_vlan_pcp ");
482 if (of_set_vlan_pcp) {
483 ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp);
484 }
485 ds_put_cstr(s, "/ ");
486 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
487 const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid =
488 actions->conf;
489
490 ds_put_cstr(s, "of_set_vlan_vid ");
491 if (of_set_vlan_vid) {
492 ds_put_format(s, "vlan_vid %"PRIu16" ",
493 ntohs(of_set_vlan_vid->vlan_vid));
494 }
495 ds_put_cstr(s, "/ ");
496 } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) {
497 ds_put_cstr(s, "of_pop_vlan / ");
498 } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ||
499 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) {
500 const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf;
501
502 char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
503 ? "dst" : "src";
504
505 ds_put_format(s, "set_ipv6_%s ", dirstr);
506 if (set_ipv6) {
507 ds_put_cstr(s, "ipv6_addr ");
508 ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s);
509 ds_put_cstr(s, " ");
510 }
511 ds_put_cstr(s, "/ ");
512 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
513 const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
514
515 ds_put_cstr(s, "raw_encap index 0 / ");
516 if (raw_encap) {
517 ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw "
518 "pattern is ", raw_encap->size);
519 for (int i = 0; i < raw_encap->size; i++) {
520 ds_put_format(s_extra, "%02x", raw_encap->data[i]);
521 }
522 ds_put_cstr(s_extra, " / end_set;");
523 }
524 } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
525 const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
526 const struct rte_flow_item *items = vxlan_encap->definition;
527
528 ds_put_cstr(s, "vxlan_encap / ");
529 dump_vxlan_encap(s_extra, items);
530 ds_put_cstr(s_extra, ";");
531 } else {
532 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
533 }
534 }
535
536 static struct ds *
537 dump_flow(struct ds *s, struct ds *s_extra,
538 const struct rte_flow_attr *attr,
539 const struct rte_flow_item *items,
540 const struct rte_flow_action *actions)
541 {
542 if (attr) {
543 dump_flow_attr(s, attr);
544 }
545 ds_put_cstr(s, "pattern ");
546 while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
547 dump_flow_pattern(s, items++);
548 }
549 ds_put_cstr(s, "end actions ");
550 while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
551 dump_flow_action(s, s_extra, actions++);
552 }
553 ds_put_cstr(s, "end");
554 return s;
555 }
556
557 static struct rte_flow *
558 netdev_offload_dpdk_flow_create(struct netdev *netdev,
559 const struct rte_flow_attr *attr,
560 const struct rte_flow_item *items,
561 const struct rte_flow_action *actions,
562 struct rte_flow_error *error)
563 {
564 struct ds s_extra = DS_EMPTY_INITIALIZER;
565 struct ds s = DS_EMPTY_INITIALIZER;
566 struct rte_flow *flow;
567 char *extra_str;
568
569 flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
570 if (flow) {
571 if (!VLOG_DROP_DBG(&rl)) {
572 dump_flow(&s, &s_extra, attr, items, actions);
573 extra_str = ds_cstr(&s_extra);
574 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
575 netdev_get_name(netdev), (intptr_t) flow, extra_str,
576 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
577 }
578 } else {
579 enum vlog_level level = VLL_WARN;
580
581 if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) {
582 level = VLL_DBG;
583 }
584 VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
585 netdev_get_name(netdev), error->type, error->message);
586 if (!vlog_should_drop(&this_module, level, &rl)) {
587 dump_flow(&s, &s_extra, attr, items, actions);
588 extra_str = ds_cstr(&s_extra);
589 VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
590 netdev_get_name(netdev), extra_str,
591 netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
592 }
593 }
594 ds_destroy(&s);
595 ds_destroy(&s_extra);
596 return flow;
597 }
598
599 static void
600 add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
601 const void *spec, const void *mask)
602 {
603 int cnt = patterns->cnt;
604
605 if (cnt == 0) {
606 patterns->current_max = 8;
607 patterns->items = xcalloc(patterns->current_max,
608 sizeof *patterns->items);
609 } else if (cnt == patterns->current_max) {
610 patterns->current_max *= 2;
611 patterns->items = xrealloc(patterns->items, patterns->current_max *
612 sizeof *patterns->items);
613 }
614
615 patterns->items[cnt].type = type;
616 patterns->items[cnt].spec = spec;
617 patterns->items[cnt].mask = mask;
618 patterns->items[cnt].last = NULL;
619 patterns->cnt++;
620 }
621
622 static void
623 add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
624 const void *conf)
625 {
626 int cnt = actions->cnt;
627
628 if (cnt == 0) {
629 actions->current_max = 8;
630 actions->actions = xcalloc(actions->current_max,
631 sizeof *actions->actions);
632 } else if (cnt == actions->current_max) {
633 actions->current_max *= 2;
634 actions->actions = xrealloc(actions->actions, actions->current_max *
635 sizeof *actions->actions);
636 }
637
638 actions->actions[cnt].type = type;
639 actions->actions[cnt].conf = conf;
640 actions->cnt++;
641 }
642
643 static void
644 free_flow_patterns(struct flow_patterns *patterns)
645 {
646 int i;
647
648 for (i = 0; i < patterns->cnt; i++) {
649 if (patterns->items[i].spec) {
650 free(CONST_CAST(void *, patterns->items[i].spec));
651 }
652 if (patterns->items[i].mask) {
653 free(CONST_CAST(void *, patterns->items[i].mask));
654 }
655 }
656 free(patterns->items);
657 patterns->items = NULL;
658 patterns->cnt = 0;
659 }
660
661 static void
662 free_flow_actions(struct flow_actions *actions)
663 {
664 int i;
665
666 for (i = 0; i < actions->cnt; i++) {
667 if (actions->actions[i].conf) {
668 free(CONST_CAST(void *, actions->actions[i].conf));
669 }
670 }
671 free(actions->actions);
672 actions->actions = NULL;
673 actions->cnt = 0;
674 }
675
676 static int
677 parse_flow_match(struct flow_patterns *patterns,
678 struct match *match)
679 {
680 struct flow *consumed_masks;
681 uint8_t proto = 0;
682
683 consumed_masks = &match->wc.masks;
684
685 if (!flow_tnl_dst_is_set(&match->flow.tunnel)) {
686 memset(&consumed_masks->tunnel, 0, sizeof consumed_masks->tunnel);
687 }
688
689 memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port);
690 /* recirc id must be zero. */
691 if (match->wc.masks.recirc_id & match->flow.recirc_id) {
692 return -1;
693 }
694 consumed_masks->recirc_id = 0;
695 consumed_masks->packet_type = 0;
696
697 /* Eth */
698 if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow)
699 && eth_addr_is_zero(match->wc.masks.dl_dst)
700 && eth_addr_is_zero(match->wc.masks.dl_src)) {
701 /*
702 * This is a temporary work around to fix ethernet pattern for partial
703 * hardware offload for X710 devices. This fix will be reverted once
704 * the issue is fixed within the i40e PMD driver.
705 */
706 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
707
708 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
709 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
710 consumed_masks->dl_type = 0;
711 } else if (match->wc.masks.dl_type ||
712 !eth_addr_is_zero(match->wc.masks.dl_src) ||
713 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
714 struct rte_flow_item_eth *spec, *mask;
715
716 spec = xzalloc(sizeof *spec);
717 mask = xzalloc(sizeof *mask);
718
719 memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst);
720 memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src);
721 spec->type = match->flow.dl_type;
722
723 memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst);
724 memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src);
725 mask->type = match->wc.masks.dl_type;
726
727 memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
728 memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
729 consumed_masks->dl_type = 0;
730
731 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
732 }
733
734 /* VLAN */
735 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
736 struct rte_flow_item_vlan *spec, *mask;
737
738 spec = xzalloc(sizeof *spec);
739 mask = xzalloc(sizeof *mask);
740
741 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
742 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
743
744 /* Match any protocols. */
745 mask->inner_type = 0;
746
747 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
748 }
749 /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
750 * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
751 * scope to handle that.
752 */
753 memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]);
754
755 /* IP v4 */
756 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
757 struct rte_flow_item_ipv4 *spec, *mask;
758
759 spec = xzalloc(sizeof *spec);
760 mask = xzalloc(sizeof *mask);
761
762 spec->hdr.type_of_service = match->flow.nw_tos;
763 spec->hdr.time_to_live = match->flow.nw_ttl;
764 spec->hdr.next_proto_id = match->flow.nw_proto;
765 spec->hdr.src_addr = match->flow.nw_src;
766 spec->hdr.dst_addr = match->flow.nw_dst;
767
768 mask->hdr.type_of_service = match->wc.masks.nw_tos;
769 mask->hdr.time_to_live = match->wc.masks.nw_ttl;
770 mask->hdr.next_proto_id = match->wc.masks.nw_proto;
771 mask->hdr.src_addr = match->wc.masks.nw_src;
772 mask->hdr.dst_addr = match->wc.masks.nw_dst;
773
774 consumed_masks->nw_tos = 0;
775 consumed_masks->nw_ttl = 0;
776 consumed_masks->nw_proto = 0;
777 consumed_masks->nw_src = 0;
778 consumed_masks->nw_dst = 0;
779
780 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask);
781
782 /* Save proto for L4 protocol setup. */
783 proto = spec->hdr.next_proto_id &
784 mask->hdr.next_proto_id;
785 }
786 /* If fragmented, then don't HW accelerate - for now. */
787 if (match->wc.masks.nw_frag & match->flow.nw_frag) {
788 return -1;
789 }
790 consumed_masks->nw_frag = 0;
791
792 /* IP v6 */
793 if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {
794 struct rte_flow_item_ipv6 *spec, *mask;
795
796 spec = xzalloc(sizeof *spec);
797 mask = xzalloc(sizeof *mask);
798
799 spec->hdr.proto = match->flow.nw_proto;
800 spec->hdr.hop_limits = match->flow.nw_ttl;
801 spec->hdr.vtc_flow =
802 htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
803 memcpy(spec->hdr.src_addr, &match->flow.ipv6_src,
804 sizeof spec->hdr.src_addr);
805 memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst,
806 sizeof spec->hdr.dst_addr);
807
808 mask->hdr.proto = match->wc.masks.nw_proto;
809 mask->hdr.hop_limits = match->wc.masks.nw_ttl;
810 mask->hdr.vtc_flow =
811 htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
812 memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src,
813 sizeof mask->hdr.src_addr);
814 memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst,
815 sizeof mask->hdr.dst_addr);
816
817 consumed_masks->nw_proto = 0;
818 consumed_masks->nw_ttl = 0;
819 consumed_masks->nw_tos = 0;
820 memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src);
821 memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst);
822
823 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask);
824
825 /* Save proto for L4 protocol setup. */
826 proto = spec->hdr.proto & mask->hdr.proto;
827 }
828
829 if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
830 proto != IPPROTO_SCTP && proto != IPPROTO_TCP &&
831 (match->wc.masks.tp_src ||
832 match->wc.masks.tp_dst ||
833 match->wc.masks.tcp_flags)) {
834 VLOG_DBG("L4 Protocol (%u) not supported", proto);
835 return -1;
836 }
837
838 if (proto == IPPROTO_TCP) {
839 struct rte_flow_item_tcp *spec, *mask;
840
841 spec = xzalloc(sizeof *spec);
842 mask = xzalloc(sizeof *mask);
843
844 spec->hdr.src_port = match->flow.tp_src;
845 spec->hdr.dst_port = match->flow.tp_dst;
846 spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8;
847 spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
848
849 mask->hdr.src_port = match->wc.masks.tp_src;
850 mask->hdr.dst_port = match->wc.masks.tp_dst;
851 mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
852 mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
853
854 consumed_masks->tp_src = 0;
855 consumed_masks->tp_dst = 0;
856 consumed_masks->tcp_flags = 0;
857
858 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
859 } else if (proto == IPPROTO_UDP) {
860 struct rte_flow_item_udp *spec, *mask;
861
862 spec = xzalloc(sizeof *spec);
863 mask = xzalloc(sizeof *mask);
864
865 spec->hdr.src_port = match->flow.tp_src;
866 spec->hdr.dst_port = match->flow.tp_dst;
867
868 mask->hdr.src_port = match->wc.masks.tp_src;
869 mask->hdr.dst_port = match->wc.masks.tp_dst;
870
871 consumed_masks->tp_src = 0;
872 consumed_masks->tp_dst = 0;
873
874 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
875 } else if (proto == IPPROTO_SCTP) {
876 struct rte_flow_item_sctp *spec, *mask;
877
878 spec = xzalloc(sizeof *spec);
879 mask = xzalloc(sizeof *mask);
880
881 spec->hdr.src_port = match->flow.tp_src;
882 spec->hdr.dst_port = match->flow.tp_dst;
883
884 mask->hdr.src_port = match->wc.masks.tp_src;
885 mask->hdr.dst_port = match->wc.masks.tp_dst;
886
887 consumed_masks->tp_src = 0;
888 consumed_masks->tp_dst = 0;
889
890 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
891 } else if (proto == IPPROTO_ICMP) {
892 struct rte_flow_item_icmp *spec, *mask;
893
894 spec = xzalloc(sizeof *spec);
895 mask = xzalloc(sizeof *mask);
896
897 spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src);
898 spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst);
899
900 mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src);
901 mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst);
902
903 consumed_masks->tp_src = 0;
904 consumed_masks->tp_dst = 0;
905
906 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
907 }
908
909 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
910
911 if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) {
912 return -1;
913 }
914 return 0;
915 }
916
917 static void
918 add_flow_mark_rss_actions(struct flow_actions *actions,
919 uint32_t flow_mark,
920 const struct netdev *netdev)
921 {
922 struct rte_flow_action_mark *mark;
923 struct action_rss_data {
924 struct rte_flow_action_rss conf;
925 uint16_t queue[0];
926 } *rss_data;
927 BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0);
928 int i;
929
930 mark = xzalloc(sizeof *mark);
931
932 mark->id = flow_mark;
933 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark);
934
935 rss_data = xmalloc(sizeof *rss_data +
936 netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
937 *rss_data = (struct action_rss_data) {
938 .conf = (struct rte_flow_action_rss) {
939 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
940 .level = 0,
941 .types = 0,
942 .queue_num = netdev_n_rxq(netdev),
943 .queue = rss_data->queue,
944 .key_len = 0,
945 .key = NULL
946 },
947 };
948
949 /* Override queue array with default. */
950 for (i = 0; i < netdev_n_rxq(netdev); i++) {
951 rss_data->queue[i] = i;
952 }
953
954 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
955 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
956 }
957
958 static struct rte_flow *
959 netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
960 struct netdev *netdev,
961 uint32_t flow_mark)
962 {
963 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
964 const struct rte_flow_attr flow_attr = {
965 .group = 0,
966 .priority = 0,
967 .ingress = 1,
968 .egress = 0
969 };
970 struct rte_flow_error error;
971 struct rte_flow *flow;
972
973 add_flow_mark_rss_actions(&actions, flow_mark, netdev);
974
975 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
976 actions.actions, &error);
977
978 free_flow_actions(&actions);
979 return flow;
980 }
981
982 static void
983 add_count_action(struct flow_actions *actions)
984 {
985 struct rte_flow_action_count *count = xzalloc(sizeof *count);
986
987 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
988 }
989
990 static int
991 add_port_id_action(struct flow_actions *actions,
992 struct netdev *outdev)
993 {
994 struct rte_flow_action_port_id *port_id;
995 int outdev_id;
996
997 outdev_id = netdev_dpdk_get_port_id(outdev);
998 if (outdev_id < 0) {
999 return -1;
1000 }
1001 port_id = xzalloc(sizeof *port_id);
1002 port_id->id = outdev_id;
1003 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
1004 return 0;
1005 }
1006
1007 static int
1008 add_output_action(struct netdev *netdev,
1009 struct flow_actions *actions,
1010 const struct nlattr *nla)
1011 {
1012 struct netdev *outdev;
1013 odp_port_t port;
1014 int ret = 0;
1015
1016 port = nl_attr_get_odp_port(nla);
1017 outdev = netdev_ports_get(port, netdev->dpif_type);
1018 if (outdev == NULL) {
1019 VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port);
1020 return -1;
1021 }
1022 if (!netdev_flow_api_equals(netdev, outdev) ||
1023 add_port_id_action(actions, outdev)) {
1024 VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.",
1025 netdev_get_name(netdev), netdev_get_name(outdev));
1026 ret = -1;
1027 }
1028 netdev_close(outdev);
1029 return ret;
1030 }
1031
1032 static int
1033 add_set_flow_action__(struct flow_actions *actions,
1034 const void *value, void *mask,
1035 const size_t size, const int attr)
1036 {
1037 void *spec;
1038
1039 if (mask) {
1040 /* DPDK does not support partially masked set actions. In such
1041 * case, fail the offload.
1042 */
1043 if (is_all_zeros(mask, size)) {
1044 return 0;
1045 }
1046 if (!is_all_ones(mask, size)) {
1047 VLOG_DBG_RL(&rl, "Partial mask is not supported");
1048 return -1;
1049 }
1050 }
1051
1052 spec = xzalloc(size);
1053 memcpy(spec, value, size);
1054 add_flow_action(actions, attr, spec);
1055
1056 /* Clear used mask for later checking. */
1057 if (mask) {
1058 memset(mask, 0, size);
1059 }
1060 return 0;
1061 }
1062
1063 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1064 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src));
1065 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
1066 MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst));
1067 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1068 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src));
1069 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
1070 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
1071 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1072 MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
1073 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1074 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src));
1075 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
1076 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst));
1077 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
1078 MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit));
1079 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1080 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src));
1081 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1082 MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst));
1083 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1084 MEMBER_SIZEOF(struct ovs_key_udp, udp_src));
1085 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
1086 MEMBER_SIZEOF(struct ovs_key_udp, udp_dst));
1087
1088 static int
1089 parse_set_actions(struct flow_actions *actions,
1090 const struct nlattr *set_actions,
1091 const size_t set_actions_len,
1092 bool masked)
1093 {
1094 const struct nlattr *sa;
1095 unsigned int sleft;
1096
1097 #define add_set_flow_action(field, type) \
1098 if (add_set_flow_action__(actions, &key->field, \
1099 mask ? CONST_CAST(void *, &mask->field) : NULL, \
1100 sizeof key->field, type)) { \
1101 return -1; \
1102 }
1103
1104 NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) {
1105 if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) {
1106 const struct ovs_key_ethernet *key = nl_attr_get(sa);
1107 const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL;
1108
1109 add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC);
1110 add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST);
1111
1112 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1113 VLOG_DBG_RL(&rl, "Unsupported ETHERNET set action");
1114 return -1;
1115 }
1116 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) {
1117 const struct ovs_key_ipv4 *key = nl_attr_get(sa);
1118 const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL;
1119
1120 add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC);
1121 add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST);
1122 add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL);
1123
1124 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1125 VLOG_DBG_RL(&rl, "Unsupported IPv4 set action");
1126 return -1;
1127 }
1128 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) {
1129 const struct ovs_key_ipv6 *key = nl_attr_get(sa);
1130 const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL;
1131
1132 add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC);
1133 add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST);
1134 add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL);
1135
1136 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1137 VLOG_DBG_RL(&rl, "Unsupported IPv6 set action");
1138 return -1;
1139 }
1140 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) {
1141 const struct ovs_key_tcp *key = nl_attr_get(sa);
1142 const struct ovs_key_tcp *mask = masked ? key + 1 : NULL;
1143
1144 add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1145 add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1146
1147 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1148 VLOG_DBG_RL(&rl, "Unsupported TCP set action");
1149 return -1;
1150 }
1151 } else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) {
1152 const struct ovs_key_udp *key = nl_attr_get(sa);
1153 const struct ovs_key_udp *mask = masked ? key + 1 : NULL;
1154
1155 add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
1156 add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
1157
1158 if (mask && !is_all_zeros(mask, sizeof *mask)) {
1159 VLOG_DBG_RL(&rl, "Unsupported UDP set action");
1160 return -1;
1161 }
1162 } else {
1163 VLOG_DBG_RL(&rl,
1164 "Unsupported set action type %d", nl_attr_type(sa));
1165 return -1;
1166 }
1167 }
1168
1169 return 0;
1170 }
1171
1172 /* Maximum number of items in struct rte_flow_action_vxlan_encap.
1173 * ETH / IPv4(6) / UDP / VXLAN / END
1174 */
1175 #define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
1176
1177 static int
1178 add_vxlan_encap_action(struct flow_actions *actions,
1179 const void *header)
1180 {
1181 const struct eth_header *eth;
1182 const struct udp_header *udp;
1183 struct vxlan_data {
1184 struct rte_flow_action_vxlan_encap conf;
1185 struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM];
1186 } *vxlan_data;
1187 BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
1188 const void *vxlan;
1189 const void *l3;
1190 const void *l4;
1191 int field;
1192
1193 vxlan_data = xzalloc(sizeof *vxlan_data);
1194 field = 0;
1195
1196 eth = header;
1197 /* Ethernet */
1198 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
1199 vxlan_data->items[field].spec = eth;
1200 vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
1201 field++;
1202
1203 l3 = eth + 1;
1204 /* IP */
1205 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1206 /* IPv4 */
1207 const struct ip_header *ip = l3;
1208
1209 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
1210 vxlan_data->items[field].spec = ip;
1211 vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
1212
1213 if (ip->ip_proto != IPPROTO_UDP) {
1214 goto err;
1215 }
1216 l4 = (ip + 1);
1217 } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
1218 const struct ovs_16aligned_ip6_hdr *ip6 = l3;
1219
1220 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
1221 vxlan_data->items[field].spec = ip6;
1222 vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
1223
1224 if (ip6->ip6_nxt != IPPROTO_UDP) {
1225 goto err;
1226 }
1227 l4 = (ip6 + 1);
1228 } else {
1229 goto err;
1230 }
1231 field++;
1232
1233 udp = l4;
1234 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
1235 vxlan_data->items[field].spec = udp;
1236 vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
1237 field++;
1238
1239 vxlan = (udp + 1);
1240 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
1241 vxlan_data->items[field].spec = vxlan;
1242 vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
1243 field++;
1244
1245 vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
1246
1247 vxlan_data->conf.definition = vxlan_data->items;
1248
1249 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
1250
1251 return 0;
1252 err:
1253 free(vxlan_data);
1254 return -1;
1255 }
1256
1257 static int
1258 parse_vlan_push_action(struct flow_actions *actions,
1259 const struct ovs_action_push_vlan *vlan_push)
1260 {
1261 struct rte_flow_action_of_push_vlan *rte_push_vlan;
1262 struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp;
1263 struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid;
1264
1265 rte_push_vlan = xzalloc(sizeof *rte_push_vlan);
1266 rte_push_vlan->ethertype = vlan_push->vlan_tpid;
1267 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan);
1268
1269 rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp);
1270 rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci);
1271 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
1272 rte_vlan_pcp);
1273
1274 rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid);
1275 rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci));
1276 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
1277 rte_vlan_vid);
1278 return 0;
1279 }
1280
1281 static int
1282 parse_clone_actions(struct netdev *netdev,
1283 struct flow_actions *actions,
1284 const struct nlattr *clone_actions,
1285 const size_t clone_actions_len)
1286 {
1287 const struct nlattr *ca;
1288 unsigned int cleft;
1289
1290 NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
1291 int clone_type = nl_attr_type(ca);
1292
1293 if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
1294 const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
1295 struct rte_flow_action_raw_encap *raw_encap;
1296
1297 if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
1298 !add_vxlan_encap_action(actions, tnl_push->header)) {
1299 continue;
1300 }
1301
1302 raw_encap = xzalloc(sizeof *raw_encap);
1303 raw_encap->data = (uint8_t *) tnl_push->header;
1304 raw_encap->preserve = NULL;
1305 raw_encap->size = tnl_push->header_len;
1306
1307 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
1308 raw_encap);
1309 } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
1310 if (add_output_action(netdev, actions, ca)) {
1311 return -1;
1312 }
1313 } else {
1314 VLOG_DBG_RL(&rl,
1315 "Unsupported nested action inside clone(), "
1316 "action type: %d", clone_type);
1317 return -1;
1318 }
1319 }
1320 return 0;
1321 }
1322
1323 static int
1324 parse_flow_actions(struct netdev *netdev,
1325 struct flow_actions *actions,
1326 struct nlattr *nl_actions,
1327 size_t nl_actions_len)
1328 {
1329 struct nlattr *nla;
1330 size_t left;
1331
1332 add_count_action(actions);
1333 NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
1334 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1335 if (add_output_action(netdev, actions, nla)) {
1336 return -1;
1337 }
1338 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
1339 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL);
1340 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET ||
1341 nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
1342 const struct nlattr *set_actions = nl_attr_get(nla);
1343 const size_t set_actions_len = nl_attr_get_size(nla);
1344 bool masked = nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED;
1345
1346 if (parse_set_actions(actions, set_actions, set_actions_len,
1347 masked)) {
1348 return -1;
1349 }
1350 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) {
1351 const struct ovs_action_push_vlan *vlan = nl_attr_get(nla);
1352
1353 if (parse_vlan_push_action(actions, vlan)) {
1354 return -1;
1355 }
1356 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
1357 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL);
1358 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE &&
1359 left <= NLA_ALIGN(nla->nla_len)) {
1360 const struct nlattr *clone_actions = nl_attr_get(nla);
1361 size_t clone_actions_len = nl_attr_get_size(nla);
1362
1363 if (parse_clone_actions(netdev, actions, clone_actions,
1364 clone_actions_len)) {
1365 return -1;
1366 }
1367 } else {
1368 VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
1369 return -1;
1370 }
1371 }
1372
1373 if (nl_actions_len == 0) {
1374 VLOG_DBG_RL(&rl, "No actions provided");
1375 return -1;
1376 }
1377
1378 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
1379 return 0;
1380 }
1381
1382 static struct rte_flow *
1383 netdev_offload_dpdk_actions(struct netdev *netdev,
1384 struct flow_patterns *patterns,
1385 struct nlattr *nl_actions,
1386 size_t actions_len)
1387 {
1388 const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
1389 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
1390 struct rte_flow *flow = NULL;
1391 struct rte_flow_error error;
1392 int ret;
1393
1394 ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
1395 if (ret) {
1396 goto out;
1397 }
1398 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
1399 actions.actions, &error);
1400 out:
1401 free_flow_actions(&actions);
1402 return flow;
1403 }
1404
1405 static struct ufid_to_rte_flow_data *
1406 netdev_offload_dpdk_add_flow(struct netdev *netdev,
1407 struct match *match,
1408 struct nlattr *nl_actions,
1409 size_t actions_len,
1410 const ovs_u128 *ufid,
1411 struct offload_info *info)
1412 {
1413 struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
1414 struct ufid_to_rte_flow_data *flows_data = NULL;
1415 bool actions_offloaded = true;
1416 struct rte_flow *flow;
1417
1418 if (parse_flow_match(&patterns, match)) {
1419 VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported",
1420 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid));
1421 goto out;
1422 }
1423
1424 flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
1425 actions_len);
1426 if (!flow) {
1427 /* If we failed to offload the rule actions fallback to MARK+RSS
1428 * actions.
1429 */
1430 flow = netdev_offload_dpdk_mark_rss(&patterns, netdev,
1431 info->flow_mark);
1432 actions_offloaded = false;
1433 }
1434
1435 if (!flow) {
1436 goto out;
1437 }
1438 flows_data = ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
1439 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT,
1440 netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
1441
1442 out:
1443 free_flow_patterns(&patterns);
1444 return flows_data;
1445 }
1446
1447 static int
1448 netdev_offload_dpdk_destroy_flow(struct netdev *netdev,
1449 const ovs_u128 *ufid,
1450 struct rte_flow *rte_flow)
1451 {
1452 struct rte_flow_error error;
1453 int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
1454
1455 if (ret == 0) {
1456 ufid_to_rte_flow_disassociate(ufid);
1457 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR
1458 " flow destroy %d ufid " UUID_FMT,
1459 netdev_get_name(netdev), (intptr_t) rte_flow,
1460 netdev_dpdk_get_port_id(netdev),
1461 UUID_ARGS((struct uuid *) ufid));
1462 } else {
1463 VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT,
1464 netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev),
1465 UUID_ARGS((struct uuid *) ufid));
1466 }
1467
1468 return ret;
1469 }
1470
1471 static int
1472 netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
1473 struct nlattr *actions, size_t actions_len,
1474 const ovs_u128 *ufid, struct offload_info *info,
1475 struct dpif_flow_stats *stats)
1476 {
1477 struct ufid_to_rte_flow_data *rte_flow_data;
1478 struct dpif_flow_stats old_stats;
1479 bool modification = false;
1480 int ret;
1481
1482 /*
1483 * If an old rte_flow exists, it means it's a flow modification.
1484 * Here destroy the old rte flow first before adding a new one.
1485 * Keep the stats for the newly created rule.
1486 */
1487 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1488 if (rte_flow_data && rte_flow_data->rte_flow) {
1489 old_stats = rte_flow_data->stats;
1490 modification = true;
1491 ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
1492 rte_flow_data->rte_flow);
1493 if (ret < 0) {
1494 return ret;
1495 }
1496 }
1497
1498 rte_flow_data = netdev_offload_dpdk_add_flow(netdev, match, actions,
1499 actions_len, ufid, info);
1500 if (!rte_flow_data) {
1501 return -1;
1502 }
1503 if (modification) {
1504 rte_flow_data->stats = old_stats;
1505 }
1506 if (stats) {
1507 *stats = rte_flow_data->stats;
1508 }
1509 return 0;
1510 }
1511
1512 static int
1513 netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
1514 struct dpif_flow_stats *stats)
1515 {
1516 struct ufid_to_rte_flow_data *rte_flow_data;
1517
1518 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1519 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1520 return -1;
1521 }
1522
1523 if (stats) {
1524 memset(stats, 0, sizeof *stats);
1525 }
1526 return netdev_offload_dpdk_destroy_flow(netdev, ufid,
1527 rte_flow_data->rte_flow);
1528 }
1529
1530 static int
1531 netdev_offload_dpdk_init_flow_api(struct netdev *netdev)
1532 {
1533 return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP;
1534 }
1535
1536 static int
1537 netdev_offload_dpdk_flow_get(struct netdev *netdev,
1538 struct match *match OVS_UNUSED,
1539 struct nlattr **actions OVS_UNUSED,
1540 const ovs_u128 *ufid,
1541 struct dpif_flow_stats *stats,
1542 struct dpif_flow_attrs *attrs,
1543 struct ofpbuf *buf OVS_UNUSED)
1544 {
1545 struct rte_flow_query_count query = { .reset = 1 };
1546 struct ufid_to_rte_flow_data *rte_flow_data;
1547 struct rte_flow_error error;
1548 int ret = 0;
1549
1550 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1551 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1552 ret = -1;
1553 goto out;
1554 }
1555
1556 attrs->offloaded = true;
1557 if (!rte_flow_data->actions_offloaded) {
1558 attrs->dp_layer = "ovs";
1559 memset(stats, 0, sizeof *stats);
1560 goto out;
1561 }
1562 attrs->dp_layer = "dpdk";
1563 ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
1564 &query, &error);
1565 if (ret) {
1566 VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p",
1567 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
1568 rte_flow_data->rte_flow);
1569 goto out;
1570 }
1571 rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
1572 rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
1573 if (query.hits_set && query.hits) {
1574 rte_flow_data->stats.used = time_msec();
1575 }
1576 memcpy(stats, &rte_flow_data->stats, sizeof *stats);
1577 out:
1578 attrs->dp_extra_info = NULL;
1579 return ret;
1580 }
1581
1582 const struct netdev_flow_api netdev_offload_dpdk = {
1583 .type = "dpdk_flow_api",
1584 .flow_put = netdev_offload_dpdk_flow_put,
1585 .flow_del = netdev_offload_dpdk_flow_del,
1586 .init_flow_api = netdev_offload_dpdk_init_flow_api,
1587 .flow_get = netdev_offload_dpdk_flow_get,
1588 };