]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-offload-dpdk.c
netdev-offload-dpdk: Support offload of output action.
[mirror_ovs.git] / lib / netdev-offload-dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 * Copyright (c) 2019 Mellanox Technologies, Ltd.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 #include <config.h>
18
19 #include <rte_flow.h>
20
21 #include "cmap.h"
22 #include "dpif-netdev.h"
23 #include "netdev-offload-provider.h"
24 #include "netdev-provider.h"
25 #include "openvswitch/match.h"
26 #include "openvswitch/vlog.h"
27 #include "packets.h"
28 #include "uuid.h"
29
30 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
31 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
32
33 /* Thread-safety
34 * =============
35 *
36 * Below API is NOT thread safe in following terms:
37 *
38 * - The caller must be sure that none of these functions will be called
39 * simultaneously. Even for different 'netdev's.
40 *
41 * - The caller must be sure that 'netdev' will not be destructed/deallocated.
42 *
43 * - The caller must be sure that 'netdev' configuration will not be changed.
44 * For example, simultaneous call of 'netdev_reconfigure()' for the same
45 * 'netdev' is forbidden.
46 *
47 * For current implementation all above restrictions could be fulfilled by
48 * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */
49
50 /*
51 * A mapping from ufid to dpdk rte_flow.
52 */
53 static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
54
55 struct ufid_to_rte_flow_data {
56 struct cmap_node node;
57 ovs_u128 ufid;
58 struct rte_flow *rte_flow;
59 bool actions_offloaded;
60 struct dpif_flow_stats stats;
61 };
62
63 /* Find rte_flow with @ufid. */
64 static struct ufid_to_rte_flow_data *
65 ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
66 {
67 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
68 struct ufid_to_rte_flow_data *data;
69
70 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
71 if (ovs_u128_equals(*ufid, data->ufid)) {
72 return data;
73 }
74 }
75
76 return NULL;
77 }
78
79 static inline void
80 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
81 struct rte_flow *rte_flow, bool actions_offloaded)
82 {
83 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
84 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
85 struct ufid_to_rte_flow_data *data_prev;
86
87 /*
88 * We should not simply overwrite an existing rte flow.
89 * We should have deleted it first before re-adding it.
90 * Thus, if following assert triggers, something is wrong:
91 * the rte_flow is not destroyed.
92 */
93 data_prev = ufid_to_rte_flow_data_find(ufid);
94 if (data_prev) {
95 ovs_assert(data_prev->rte_flow == NULL);
96 }
97
98 data->ufid = *ufid;
99 data->rte_flow = rte_flow;
100 data->actions_offloaded = actions_offloaded;
101
102 cmap_insert(&ufid_to_rte_flow,
103 CONST_CAST(struct cmap_node *, &data->node), hash);
104 }
105
106 static inline void
107 ufid_to_rte_flow_disassociate(const ovs_u128 *ufid)
108 {
109 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
110 struct ufid_to_rte_flow_data *data;
111
112 CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) {
113 if (ovs_u128_equals(*ufid, data->ufid)) {
114 cmap_remove(&ufid_to_rte_flow,
115 CONST_CAST(struct cmap_node *, &data->node), hash);
116 ovsrcu_postpone(free, data);
117 return;
118 }
119 }
120
121 VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n",
122 UUID_ARGS((struct uuid *) ufid));
123 }
124
125 /*
126 * To avoid individual xrealloc calls for each new element, a 'curent_max'
127 * is used to keep track of current allocated number of elements. Starts
128 * by 8 and doubles on each xrealloc call.
129 */
130 struct flow_patterns {
131 struct rte_flow_item *items;
132 int cnt;
133 int current_max;
134 };
135
136 struct flow_actions {
137 struct rte_flow_action *actions;
138 int cnt;
139 int current_max;
140 };
141
142 static void
143 dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
144 {
145 ds_put_format(s,
146 " Attributes: "
147 "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n",
148 attr->ingress, attr->egress, attr->priority, attr->group,
149 attr->transfer);
150 }
151
152 static void
153 dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
154 {
155 if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
156 const struct rte_flow_item_eth *eth_spec = item->spec;
157 const struct rte_flow_item_eth *eth_mask = item->mask;
158
159 ds_put_cstr(s, "rte flow eth pattern:\n");
160 if (eth_spec) {
161 ds_put_format(s,
162 " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
163 "type=0x%04" PRIx16"\n",
164 ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
165 ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
166 ntohs(eth_spec->type));
167 } else {
168 ds_put_cstr(s, " Spec = null\n");
169 }
170 if (eth_mask) {
171 ds_put_format(s,
172 " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
173 "type=0x%04"PRIx16"\n",
174 ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes),
175 ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes),
176 ntohs(eth_mask->type));
177 } else {
178 ds_put_cstr(s, " Mask = null\n");
179 }
180 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
181 const struct rte_flow_item_vlan *vlan_spec = item->spec;
182 const struct rte_flow_item_vlan *vlan_mask = item->mask;
183
184 ds_put_cstr(s, "rte flow vlan pattern:\n");
185 if (vlan_spec) {
186 ds_put_format(s,
187 " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
188 ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
189 } else {
190 ds_put_cstr(s, " Spec = null\n");
191 }
192
193 if (vlan_mask) {
194 ds_put_format(s,
195 " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
196 ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci));
197 } else {
198 ds_put_cstr(s, " Mask = null\n");
199 }
200 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
201 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
202 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
203
204 ds_put_cstr(s, "rte flow ipv4 pattern:\n");
205 if (ipv4_spec) {
206 ds_put_format(s,
207 " Spec: tos=0x%"PRIx8", ttl=%"PRIx8
208 ", proto=0x%"PRIx8
209 ", src="IP_FMT", dst="IP_FMT"\n",
210 ipv4_spec->hdr.type_of_service,
211 ipv4_spec->hdr.time_to_live,
212 ipv4_spec->hdr.next_proto_id,
213 IP_ARGS(ipv4_spec->hdr.src_addr),
214 IP_ARGS(ipv4_spec->hdr.dst_addr));
215 } else {
216 ds_put_cstr(s, " Spec = null\n");
217 }
218 if (ipv4_mask) {
219 ds_put_format(s,
220 " Mask: tos=0x%"PRIx8", ttl=%"PRIx8
221 ", proto=0x%"PRIx8
222 ", src="IP_FMT", dst="IP_FMT"\n",
223 ipv4_mask->hdr.type_of_service,
224 ipv4_mask->hdr.time_to_live,
225 ipv4_mask->hdr.next_proto_id,
226 IP_ARGS(ipv4_mask->hdr.src_addr),
227 IP_ARGS(ipv4_mask->hdr.dst_addr));
228 } else {
229 ds_put_cstr(s, " Mask = null\n");
230 }
231 } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
232 const struct rte_flow_item_udp *udp_spec = item->spec;
233 const struct rte_flow_item_udp *udp_mask = item->mask;
234
235 ds_put_cstr(s, "rte flow udp pattern:\n");
236 if (udp_spec) {
237 ds_put_format(s,
238 " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n",
239 ntohs(udp_spec->hdr.src_port),
240 ntohs(udp_spec->hdr.dst_port));
241 } else {
242 ds_put_cstr(s, " Spec = null\n");
243 }
244 if (udp_mask) {
245 ds_put_format(s,
246 " Mask: src_port=0x%"PRIx16
247 ", dst_port=0x%"PRIx16"\n",
248 ntohs(udp_mask->hdr.src_port),
249 ntohs(udp_mask->hdr.dst_port));
250 } else {
251 ds_put_cstr(s, " Mask = null\n");
252 }
253 } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
254 const struct rte_flow_item_sctp *sctp_spec = item->spec;
255 const struct rte_flow_item_sctp *sctp_mask = item->mask;
256
257 ds_put_cstr(s, "rte flow sctp pattern:\n");
258 if (sctp_spec) {
259 ds_put_format(s,
260 " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n",
261 ntohs(sctp_spec->hdr.src_port),
262 ntohs(sctp_spec->hdr.dst_port));
263 } else {
264 ds_put_cstr(s, " Spec = null\n");
265 }
266 if (sctp_mask) {
267 ds_put_format(s,
268 " Mask: src_port=0x%"PRIx16
269 ", dst_port=0x%"PRIx16"\n",
270 ntohs(sctp_mask->hdr.src_port),
271 ntohs(sctp_mask->hdr.dst_port));
272 } else {
273 ds_put_cstr(s, " Mask = null\n");
274 }
275 } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) {
276 const struct rte_flow_item_icmp *icmp_spec = item->spec;
277 const struct rte_flow_item_icmp *icmp_mask = item->mask;
278
279 ds_put_cstr(s, "rte flow icmp pattern:\n");
280 if (icmp_spec) {
281 ds_put_format(s,
282 " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n",
283 icmp_spec->hdr.icmp_type,
284 icmp_spec->hdr.icmp_code);
285 } else {
286 ds_put_cstr(s, " Spec = null\n");
287 }
288 if (icmp_mask) {
289 ds_put_format(s,
290 " Mask: icmp_type=0x%"PRIx8
291 ", icmp_code=0x%"PRIx8"\n",
292 icmp_spec->hdr.icmp_type,
293 icmp_spec->hdr.icmp_code);
294 } else {
295 ds_put_cstr(s, " Mask = null\n");
296 }
297 } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
298 const struct rte_flow_item_tcp *tcp_spec = item->spec;
299 const struct rte_flow_item_tcp *tcp_mask = item->mask;
300
301 ds_put_cstr(s, "rte flow tcp pattern:\n");
302 if (tcp_spec) {
303 ds_put_format(s,
304 " Spec: src_port=%"PRIu16", dst_port=%"PRIu16
305 ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n",
306 ntohs(tcp_spec->hdr.src_port),
307 ntohs(tcp_spec->hdr.dst_port),
308 tcp_spec->hdr.data_off,
309 tcp_spec->hdr.tcp_flags);
310 } else {
311 ds_put_cstr(s, " Spec = null\n");
312 }
313 if (tcp_mask) {
314 ds_put_format(s,
315 " Mask: src_port=%"PRIx16", dst_port=%"PRIx16
316 ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n",
317 ntohs(tcp_mask->hdr.src_port),
318 ntohs(tcp_mask->hdr.dst_port),
319 tcp_mask->hdr.data_off,
320 tcp_mask->hdr.tcp_flags);
321 } else {
322 ds_put_cstr(s, " Mask = null\n");
323 }
324 } else {
325 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
326 }
327 }
328
329 static void
330 dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
331 {
332 if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
333 const struct rte_flow_action_mark *mark = actions->conf;
334
335 ds_put_cstr(s, "rte flow mark action:\n");
336 if (mark) {
337 ds_put_format(s, " Mark: id=%d\n", mark->id);
338 } else {
339 ds_put_cstr(s, " Mark = null\n");
340 }
341 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
342 const struct rte_flow_action_rss *rss = actions->conf;
343
344 ds_put_cstr(s, "rte flow RSS action:\n");
345 if (rss) {
346 ds_put_format(s, " RSS: queue_num=%d\n", rss->queue_num);
347 } else {
348 ds_put_cstr(s, " RSS = null\n");
349 }
350 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
351 const struct rte_flow_action_count *count = actions->conf;
352
353 ds_put_cstr(s, "rte flow count action:\n");
354 if (count) {
355 ds_put_format(s, " Count: shared=%d, id=%d\n", count->shared,
356 count->id);
357 } else {
358 ds_put_cstr(s, " Count = null\n");
359 }
360 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
361 const struct rte_flow_action_port_id *port_id = actions->conf;
362
363 ds_put_cstr(s, "rte flow port-id action:\n");
364 if (port_id) {
365 ds_put_format(s, " Port-id: original=%d, id=%d\n",
366 port_id->original, port_id->id);
367 } else {
368 ds_put_cstr(s, " Port-id = null\n");
369 }
370 } else {
371 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
372 }
373 }
374
375 static struct ds *
376 dump_flow(struct ds *s,
377 const struct rte_flow_attr *attr,
378 const struct rte_flow_item *items,
379 const struct rte_flow_action *actions)
380 {
381 if (attr) {
382 dump_flow_attr(s, attr);
383 }
384 while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
385 dump_flow_pattern(s, items++);
386 }
387 while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
388 dump_flow_action(s, actions++);
389 }
390 return s;
391 }
392
393 static struct rte_flow *
394 netdev_offload_dpdk_flow_create(struct netdev *netdev,
395 const struct rte_flow_attr *attr,
396 const struct rte_flow_item *items,
397 const struct rte_flow_action *actions,
398 struct rte_flow_error *error)
399 {
400 struct rte_flow *flow;
401 struct ds s;
402
403 flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
404 if (flow) {
405 if (!VLOG_DROP_DBG(&rl)) {
406 ds_init(&s);
407 dump_flow(&s, attr, items, actions);
408 VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" created:\n%s",
409 netdev_get_name(netdev), (intptr_t) flow, ds_cstr(&s));
410 ds_destroy(&s);
411 }
412 } else {
413 enum vlog_level level = VLL_WARN;
414
415 if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) {
416 level = VLL_DBG;
417 }
418 VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
419 netdev_get_name(netdev), error->type, error->message);
420 if (!vlog_should_drop(&this_module, level, &rl)) {
421 ds_init(&s);
422 dump_flow(&s, attr, items, actions);
423 VLOG_RL(&rl, level, "Failed flow:\n%s", ds_cstr(&s));
424 ds_destroy(&s);
425 }
426 }
427 return flow;
428 }
429
430 static void
431 add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
432 const void *spec, const void *mask)
433 {
434 int cnt = patterns->cnt;
435
436 if (cnt == 0) {
437 patterns->current_max = 8;
438 patterns->items = xcalloc(patterns->current_max,
439 sizeof *patterns->items);
440 } else if (cnt == patterns->current_max) {
441 patterns->current_max *= 2;
442 patterns->items = xrealloc(patterns->items, patterns->current_max *
443 sizeof *patterns->items);
444 }
445
446 patterns->items[cnt].type = type;
447 patterns->items[cnt].spec = spec;
448 patterns->items[cnt].mask = mask;
449 patterns->items[cnt].last = NULL;
450 patterns->cnt++;
451 }
452
453 static void
454 add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
455 const void *conf)
456 {
457 int cnt = actions->cnt;
458
459 if (cnt == 0) {
460 actions->current_max = 8;
461 actions->actions = xcalloc(actions->current_max,
462 sizeof *actions->actions);
463 } else if (cnt == actions->current_max) {
464 actions->current_max *= 2;
465 actions->actions = xrealloc(actions->actions, actions->current_max *
466 sizeof *actions->actions);
467 }
468
469 actions->actions[cnt].type = type;
470 actions->actions[cnt].conf = conf;
471 actions->cnt++;
472 }
473
474 static void
475 free_flow_patterns(struct flow_patterns *patterns)
476 {
477 int i;
478
479 for (i = 0; i < patterns->cnt; i++) {
480 if (patterns->items[i].spec) {
481 free(CONST_CAST(void *, patterns->items[i].spec));
482 }
483 if (patterns->items[i].mask) {
484 free(CONST_CAST(void *, patterns->items[i].mask));
485 }
486 }
487 free(patterns->items);
488 patterns->items = NULL;
489 patterns->cnt = 0;
490 }
491
492 static void
493 free_flow_actions(struct flow_actions *actions)
494 {
495 int i;
496
497 for (i = 0; i < actions->cnt; i++) {
498 if (actions->actions[i].conf) {
499 free(CONST_CAST(void *, actions->actions[i].conf));
500 }
501 }
502 free(actions->actions);
503 actions->actions = NULL;
504 actions->cnt = 0;
505 }
506
507 static int
508 parse_flow_match(struct flow_patterns *patterns,
509 const struct match *match)
510 {
511 uint8_t *next_proto_mask = NULL;
512 uint8_t proto = 0;
513
514 /* Eth */
515 if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
516 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
517 struct rte_flow_item_eth *spec, *mask;
518
519 spec = xzalloc(sizeof *spec);
520 mask = xzalloc(sizeof *mask);
521
522 memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst);
523 memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src);
524 spec->type = match->flow.dl_type;
525
526 memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst);
527 memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src);
528 mask->type = match->wc.masks.dl_type;
529
530 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
531 } else {
532 /*
533 * If user specifies a flow (like UDP flow) without L2 patterns,
534 * OVS will at least set the dl_type. Normally, it's enough to
535 * create an eth pattern just with it. Unluckily, some Intel's
536 * NIC (such as XL710) doesn't support that. Below is a workaround,
537 * which simply matches any L2 pkts.
538 */
539 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
540 }
541
542 /* VLAN */
543 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
544 struct rte_flow_item_vlan *spec, *mask;
545
546 spec = xzalloc(sizeof *spec);
547 mask = xzalloc(sizeof *mask);
548
549 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
550 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
551
552 /* Match any protocols. */
553 mask->inner_type = 0;
554
555 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
556 }
557
558 /* IP v4 */
559 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
560 struct rte_flow_item_ipv4 *spec, *mask;
561
562 spec = xzalloc(sizeof *spec);
563 mask = xzalloc(sizeof *mask);
564
565 spec->hdr.type_of_service = match->flow.nw_tos;
566 spec->hdr.time_to_live = match->flow.nw_ttl;
567 spec->hdr.next_proto_id = match->flow.nw_proto;
568 spec->hdr.src_addr = match->flow.nw_src;
569 spec->hdr.dst_addr = match->flow.nw_dst;
570
571 mask->hdr.type_of_service = match->wc.masks.nw_tos;
572 mask->hdr.time_to_live = match->wc.masks.nw_ttl;
573 mask->hdr.next_proto_id = match->wc.masks.nw_proto;
574 mask->hdr.src_addr = match->wc.masks.nw_src;
575 mask->hdr.dst_addr = match->wc.masks.nw_dst;
576
577 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask);
578
579 /* Save proto for L4 protocol setup. */
580 proto = spec->hdr.next_proto_id &
581 mask->hdr.next_proto_id;
582 next_proto_mask = &mask->hdr.next_proto_id;
583 }
584
585 if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
586 proto != IPPROTO_SCTP && proto != IPPROTO_TCP &&
587 (match->wc.masks.tp_src ||
588 match->wc.masks.tp_dst ||
589 match->wc.masks.tcp_flags)) {
590 VLOG_DBG("L4 Protocol (%u) not supported", proto);
591 return -1;
592 }
593
594 if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) ||
595 (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) {
596 return -1;
597 }
598
599 if (proto == IPPROTO_TCP) {
600 struct rte_flow_item_tcp *spec, *mask;
601
602 spec = xzalloc(sizeof *spec);
603 mask = xzalloc(sizeof *mask);
604
605 spec->hdr.src_port = match->flow.tp_src;
606 spec->hdr.dst_port = match->flow.tp_dst;
607 spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8;
608 spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
609
610 mask->hdr.src_port = match->wc.masks.tp_src;
611 mask->hdr.dst_port = match->wc.masks.tp_dst;
612 mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
613 mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
614
615 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
616
617 /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
618 if (next_proto_mask) {
619 *next_proto_mask = 0;
620 }
621 } else if (proto == IPPROTO_UDP) {
622 struct rte_flow_item_udp *spec, *mask;
623
624 spec = xzalloc(sizeof *spec);
625 mask = xzalloc(sizeof *mask);
626
627 spec->hdr.src_port = match->flow.tp_src;
628 spec->hdr.dst_port = match->flow.tp_dst;
629
630 mask->hdr.src_port = match->wc.masks.tp_src;
631 mask->hdr.dst_port = match->wc.masks.tp_dst;
632
633 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
634
635 /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
636 if (next_proto_mask) {
637 *next_proto_mask = 0;
638 }
639 } else if (proto == IPPROTO_SCTP) {
640 struct rte_flow_item_sctp *spec, *mask;
641
642 spec = xzalloc(sizeof *spec);
643 mask = xzalloc(sizeof *mask);
644
645 spec->hdr.src_port = match->flow.tp_src;
646 spec->hdr.dst_port = match->flow.tp_dst;
647
648 mask->hdr.src_port = match->wc.masks.tp_src;
649 mask->hdr.dst_port = match->wc.masks.tp_dst;
650
651 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
652
653 /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
654 if (next_proto_mask) {
655 *next_proto_mask = 0;
656 }
657 } else if (proto == IPPROTO_ICMP) {
658 struct rte_flow_item_icmp *spec, *mask;
659
660 spec = xzalloc(sizeof *spec);
661 mask = xzalloc(sizeof *mask);
662
663 spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src);
664 spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst);
665
666 mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src);
667 mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst);
668
669 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
670
671 /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
672 if (next_proto_mask) {
673 *next_proto_mask = 0;
674 }
675 }
676
677 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
678
679 return 0;
680 }
681
682 static void
683 add_flow_mark_rss_actions(struct flow_actions *actions,
684 uint32_t flow_mark,
685 const struct netdev *netdev)
686 {
687 struct rte_flow_action_mark *mark;
688 struct action_rss_data {
689 struct rte_flow_action_rss conf;
690 uint16_t queue[0];
691 } *rss_data;
692 BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0);
693 int i;
694
695 mark = xzalloc(sizeof *mark);
696
697 mark->id = flow_mark;
698 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark);
699
700 rss_data = xmalloc(sizeof *rss_data +
701 netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
702 *rss_data = (struct action_rss_data) {
703 .conf = (struct rte_flow_action_rss) {
704 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
705 .level = 0,
706 .types = 0,
707 .queue_num = netdev_n_rxq(netdev),
708 .queue = rss_data->queue,
709 .key_len = 0,
710 .key = NULL
711 },
712 };
713
714 /* Override queue array with default. */
715 for (i = 0; i < netdev_n_rxq(netdev); i++) {
716 rss_data->queue[i] = i;
717 }
718
719 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
720 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
721 }
722
723 static struct rte_flow *
724 netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
725 struct netdev *netdev,
726 uint32_t flow_mark)
727 {
728 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
729 const struct rte_flow_attr flow_attr = {
730 .group = 0,
731 .priority = 0,
732 .ingress = 1,
733 .egress = 0
734 };
735 struct rte_flow_error error;
736 struct rte_flow *flow;
737
738 add_flow_mark_rss_actions(&actions, flow_mark, netdev);
739
740 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
741 actions.actions, &error);
742
743 free_flow_actions(&actions);
744 return flow;
745 }
746
747 static void
748 add_count_action(struct flow_actions *actions)
749 {
750 struct rte_flow_action_count *count = xzalloc(sizeof *count);
751
752 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
753 }
754
755 static int
756 add_port_id_action(struct flow_actions *actions,
757 struct netdev *outdev)
758 {
759 struct rte_flow_action_port_id *port_id;
760 int outdev_id;
761
762 outdev_id = netdev_dpdk_get_port_id(outdev);
763 if (outdev_id < 0) {
764 return -1;
765 }
766 port_id = xzalloc(sizeof *port_id);
767 port_id->id = outdev_id;
768 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
769 return 0;
770 }
771
772 static int
773 add_output_action(struct netdev *netdev,
774 struct flow_actions *actions,
775 const struct nlattr *nla,
776 struct offload_info *info)
777 {
778 struct netdev *outdev;
779 odp_port_t port;
780 int ret = 0;
781
782 port = nl_attr_get_odp_port(nla);
783 outdev = netdev_ports_get(port, info->dpif_class);
784 if (outdev == NULL) {
785 VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port);
786 return -1;
787 }
788 if (!netdev_flow_api_equals(netdev, outdev) ||
789 add_port_id_action(actions, outdev)) {
790 VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.",
791 netdev_get_name(netdev), netdev_get_name(outdev));
792 ret = -1;
793 }
794 netdev_close(outdev);
795 return ret;
796 }
797
798 static int
799 parse_flow_actions(struct netdev *netdev,
800 struct flow_actions *actions,
801 struct nlattr *nl_actions,
802 size_t nl_actions_len,
803 struct offload_info *info)
804 {
805 struct nlattr *nla;
806 size_t left;
807
808 add_count_action(actions);
809 NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
810 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
811 if (add_output_action(netdev, actions, nla, info)) {
812 return -1;
813 }
814 } else {
815 VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
816 return -1;
817 }
818 }
819
820 if (nl_actions_len == 0) {
821 VLOG_DBG_RL(&rl, "No actions provided");
822 return -1;
823 }
824
825 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
826 return 0;
827 }
828
829 static struct rte_flow *
830 netdev_offload_dpdk_actions(struct netdev *netdev,
831 struct flow_patterns *patterns,
832 struct nlattr *nl_actions,
833 size_t actions_len,
834 struct offload_info *info)
835 {
836 const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
837 struct flow_actions actions = { .actions = NULL, .cnt = 0 };
838 struct rte_flow *flow = NULL;
839 struct rte_flow_error error;
840 int ret;
841
842 ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len, info);
843 if (ret) {
844 goto out;
845 }
846 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
847 actions.actions, &error);
848 out:
849 free_flow_actions(&actions);
850 return flow;
851 }
852
853 static int
854 netdev_offload_dpdk_add_flow(struct netdev *netdev,
855 const struct match *match,
856 struct nlattr *nl_actions,
857 size_t actions_len,
858 const ovs_u128 *ufid,
859 struct offload_info *info)
860 {
861 struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
862 bool actions_offloaded = true;
863 struct rte_flow *flow;
864 int ret = 0;
865
866 ret = parse_flow_match(&patterns, match);
867 if (ret) {
868 goto out;
869 }
870
871 flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
872 actions_len, info);
873 if (!flow) {
874 /* If we failed to offload the rule actions fallback to MARK+RSS
875 * actions.
876 */
877 flow = netdev_offload_dpdk_mark_rss(&patterns, netdev,
878 info->flow_mark);
879 actions_offloaded = false;
880 }
881
882 if (!flow) {
883 ret = -1;
884 goto out;
885 }
886 ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
887 VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n",
888 netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
889
890 out:
891 free_flow_patterns(&patterns);
892 return ret;
893 }
894
895 /*
896 * Check if any unsupported flow patterns are specified.
897 */
898 static int
899 netdev_offload_dpdk_validate_flow(const struct match *match)
900 {
901 struct match match_zero_wc;
902 const struct flow *masks = &match->wc.masks;
903
904 /* Create a wc-zeroed version of flow. */
905 match_init(&match_zero_wc, &match->flow, &match->wc);
906
907 if (!is_all_zeros(&match_zero_wc.flow.tunnel,
908 sizeof match_zero_wc.flow.tunnel)) {
909 goto err;
910 }
911
912 if (masks->metadata || masks->skb_priority ||
913 masks->pkt_mark || masks->dp_hash) {
914 goto err;
915 }
916
917 /* recirc id must be zero. */
918 if (match_zero_wc.flow.recirc_id) {
919 goto err;
920 }
921
922 if (masks->ct_state || masks->ct_nw_proto ||
923 masks->ct_zone || masks->ct_mark ||
924 !ovs_u128_is_zero(masks->ct_label)) {
925 goto err;
926 }
927
928 if (masks->conj_id || masks->actset_output) {
929 goto err;
930 }
931
932 /* Unsupported L2. */
933 if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) {
934 goto err;
935 }
936
937 /* Unsupported L3. */
938 if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst ||
939 !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) ||
940 !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) ||
941 !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) ||
942 !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) ||
943 !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) ||
944 !is_all_zeros(&masks->nsh, sizeof masks->nsh) ||
945 !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) ||
946 !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) {
947 goto err;
948 }
949
950 /* If fragmented, then don't HW accelerate - for now. */
951 if (match_zero_wc.flow.nw_frag) {
952 goto err;
953 }
954
955 /* Unsupported L4. */
956 if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) {
957 goto err;
958 }
959
960 return 0;
961
962 err:
963 VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols");
964 return -1;
965 }
966
967 static int
968 netdev_offload_dpdk_destroy_flow(struct netdev *netdev,
969 const ovs_u128 *ufid,
970 struct rte_flow *rte_flow)
971 {
972 struct rte_flow_error error;
973 int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
974
975 if (ret == 0) {
976 ufid_to_rte_flow_disassociate(ufid);
977 VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n",
978 netdev_get_name(netdev), rte_flow,
979 UUID_ARGS((struct uuid *)ufid));
980 } else {
981 VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n",
982 netdev_get_name(netdev), error.message, error.type);
983 }
984
985 return ret;
986 }
987
988 static int
989 netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
990 struct nlattr *actions, size_t actions_len,
991 const ovs_u128 *ufid, struct offload_info *info,
992 struct dpif_flow_stats *stats)
993 {
994 struct ufid_to_rte_flow_data *rte_flow_data;
995 int ret;
996
997 /*
998 * If an old rte_flow exists, it means it's a flow modification.
999 * Here destroy the old rte flow first before adding a new one.
1000 */
1001 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1002 if (rte_flow_data && rte_flow_data->rte_flow) {
1003 ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
1004 rte_flow_data->rte_flow);
1005 if (ret < 0) {
1006 return ret;
1007 }
1008 }
1009
1010 ret = netdev_offload_dpdk_validate_flow(match);
1011 if (ret < 0) {
1012 return ret;
1013 }
1014
1015 if (stats) {
1016 memset(stats, 0, sizeof *stats);
1017 }
1018 return netdev_offload_dpdk_add_flow(netdev, match, actions,
1019 actions_len, ufid, info);
1020 }
1021
1022 static int
1023 netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
1024 struct dpif_flow_stats *stats)
1025 {
1026 struct ufid_to_rte_flow_data *rte_flow_data;
1027
1028 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1029 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1030 return -1;
1031 }
1032
1033 if (stats) {
1034 memset(stats, 0, sizeof *stats);
1035 }
1036 return netdev_offload_dpdk_destroy_flow(netdev, ufid,
1037 rte_flow_data->rte_flow);
1038 }
1039
1040 static int
1041 netdev_offload_dpdk_init_flow_api(struct netdev *netdev)
1042 {
1043 return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP;
1044 }
1045
1046 static int
1047 netdev_offload_dpdk_flow_get(struct netdev *netdev,
1048 struct match *match OVS_UNUSED,
1049 struct nlattr **actions OVS_UNUSED,
1050 const ovs_u128 *ufid,
1051 struct dpif_flow_stats *stats,
1052 struct dpif_flow_attrs *attrs,
1053 struct ofpbuf *buf OVS_UNUSED)
1054 {
1055 struct rte_flow_query_count query = { .reset = 1 };
1056 struct ufid_to_rte_flow_data *rte_flow_data;
1057 struct rte_flow_error error;
1058 int ret = 0;
1059
1060 rte_flow_data = ufid_to_rte_flow_data_find(ufid);
1061 if (!rte_flow_data || !rte_flow_data->rte_flow) {
1062 ret = -1;
1063 goto out;
1064 }
1065
1066 attrs->offloaded = true;
1067 if (!rte_flow_data->actions_offloaded) {
1068 attrs->dp_layer = "ovs";
1069 memset(stats, 0, sizeof *stats);
1070 goto out;
1071 }
1072 attrs->dp_layer = "dpdk";
1073 ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
1074 &query, &error);
1075 if (ret) {
1076 VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p\n",
1077 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
1078 rte_flow_data->rte_flow);
1079 goto out;
1080 }
1081 rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
1082 rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
1083 if (query.hits_set && query.hits) {
1084 rte_flow_data->stats.used = time_msec();
1085 }
1086 memcpy(stats, &rte_flow_data->stats, sizeof *stats);
1087 out:
1088 return ret;
1089 }
1090
1091 const struct netdev_flow_api netdev_offload_dpdk = {
1092 .type = "dpdk_flow_api",
1093 .flow_put = netdev_offload_dpdk_flow_put,
1094 .flow_del = netdev_offload_dpdk_flow_del,
1095 .init_flow_api = netdev_offload_dpdk_init_flow_api,
1096 .flow_get = netdev_offload_dpdk_flow_get,
1097 };