]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-offload-tc.c
netdev-offload-tc: Add drop action support.
[mirror_ovs.git] / lib / netdev-offload-tc.c
1 /*
2 * Copyright (c) 2016 Mellanox Technologies, Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include <errno.h>
20 #include <linux/if_ether.h>
21
22 #include "dpif.h"
23 #include "hash.h"
24 #include "openvswitch/hmap.h"
25 #include "openvswitch/match.h"
26 #include "openvswitch/ofpbuf.h"
27 #include "openvswitch/thread.h"
28 #include "openvswitch/types.h"
29 #include "openvswitch/util.h"
30 #include "openvswitch/vlog.h"
31 #include "netdev-linux.h"
32 #include "netdev-offload-provider.h"
33 #include "netdev-provider.h"
34 #include "netlink.h"
35 #include "netlink-socket.h"
36 #include "odp-netlink.h"
37 #include "odp-util.h"
38 #include "tc.h"
39 #include "unaligned.h"
40 #include "util.h"
41 #include "dpif-provider.h"
42
43 VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);
44
45 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
46
47 static struct hmap ufid_to_tc = HMAP_INITIALIZER(&ufid_to_tc);
48 static struct hmap tc_to_ufid = HMAP_INITIALIZER(&tc_to_ufid);
49 static bool multi_mask_per_prio = false;
50 static bool block_support = false;
51
52 struct netlink_field {
53 int offset;
54 int flower_offset;
55 int size;
56 };
57
58 static bool
59 is_internal_port(const char *type)
60 {
61 return !strcmp(type, "internal");
62 }
63
64 static enum tc_qdisc_hook
65 get_tc_qdisc_hook(struct netdev *netdev)
66 {
67 return is_internal_port(netdev_get_type(netdev)) ? TC_EGRESS : TC_INGRESS;
68 }
69
70 static struct netlink_field set_flower_map[][4] = {
71 [OVS_KEY_ATTR_IPV4] = {
72 { offsetof(struct ovs_key_ipv4, ipv4_src),
73 offsetof(struct tc_flower_key, ipv4.ipv4_src),
74 MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
75 },
76 { offsetof(struct ovs_key_ipv4, ipv4_dst),
77 offsetof(struct tc_flower_key, ipv4.ipv4_dst),
78 MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
79 },
80 { offsetof(struct ovs_key_ipv4, ipv4_ttl),
81 offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
82 MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
83 },
84 { offsetof(struct ovs_key_ipv4, ipv4_tos),
85 offsetof(struct tc_flower_key, ipv4.rewrite_tos),
86 MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_tos)
87 },
88 },
89 [OVS_KEY_ATTR_IPV6] = {
90 { offsetof(struct ovs_key_ipv6, ipv6_src),
91 offsetof(struct tc_flower_key, ipv6.ipv6_src),
92 MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
93 },
94 { offsetof(struct ovs_key_ipv6, ipv6_dst),
95 offsetof(struct tc_flower_key, ipv6.ipv6_dst),
96 MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
97 },
98 { offsetof(struct ovs_key_ipv6, ipv6_hlimit),
99 offsetof(struct tc_flower_key, ipv6.rewrite_hlimit),
100 MEMBER_SIZEOF(struct tc_flower_key, ipv6.rewrite_hlimit)
101 },
102 { offsetof(struct ovs_key_ipv6, ipv6_tclass),
103 offsetof(struct tc_flower_key, ipv6.rewrite_tclass),
104 MEMBER_SIZEOF(struct tc_flower_key, ipv6.rewrite_tclass)
105 },
106 },
107 [OVS_KEY_ATTR_ETHERNET] = {
108 { offsetof(struct ovs_key_ethernet, eth_src),
109 offsetof(struct tc_flower_key, src_mac),
110 MEMBER_SIZEOF(struct tc_flower_key, src_mac)
111 },
112 { offsetof(struct ovs_key_ethernet, eth_dst),
113 offsetof(struct tc_flower_key, dst_mac),
114 MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
115 },
116 },
117 [OVS_KEY_ATTR_ETHERTYPE] = {
118 { 0,
119 offsetof(struct tc_flower_key, eth_type),
120 MEMBER_SIZEOF(struct tc_flower_key, eth_type)
121 },
122 },
123 [OVS_KEY_ATTR_TCP] = {
124 { offsetof(struct ovs_key_tcp, tcp_src),
125 offsetof(struct tc_flower_key, tcp_src),
126 MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
127 },
128 { offsetof(struct ovs_key_tcp, tcp_dst),
129 offsetof(struct tc_flower_key, tcp_dst),
130 MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
131 },
132 },
133 [OVS_KEY_ATTR_UDP] = {
134 { offsetof(struct ovs_key_udp, udp_src),
135 offsetof(struct tc_flower_key, udp_src),
136 MEMBER_SIZEOF(struct tc_flower_key, udp_src)
137 },
138 { offsetof(struct ovs_key_udp, udp_dst),
139 offsetof(struct tc_flower_key, udp_dst),
140 MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
141 },
142 },
143 };
144
145 static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
146
147 /**
148 * struct ufid_tc_data - data entry for ufid-tc hashmaps.
149 * @ufid_to_tc_node: Element in @ufid_to_tc hash table by ufid key.
150 * @tc_to_ufid_node: Element in @tc_to_ufid hash table by tcf_id key.
151 * @ufid: ufid assigned to the flow
152 * @id: tc filter id (tcf_id)
153 * @netdev: netdev associated with the tc rule
154 */
155 struct ufid_tc_data {
156 struct hmap_node ufid_to_tc_node;
157 struct hmap_node tc_to_ufid_node;
158 ovs_u128 ufid;
159 struct tcf_id id;
160 struct netdev *netdev;
161 };
162
163 static void
164 del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
165 {
166 size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
167 struct ufid_tc_data *data;
168
169 HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash, &ufid_to_tc) {
170 if (ovs_u128_equals(*ufid, data->ufid)) {
171 break;
172 }
173 }
174
175 if (!data) {
176 return;
177 }
178
179 hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
180 hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
181 netdev_close(data->netdev);
182 free(data);
183 }
184
185 /* Remove matching ufid entry from ufid-tc hashmaps. */
186 static void
187 del_ufid_tc_mapping(const ovs_u128 *ufid)
188 {
189 ovs_mutex_lock(&ufid_lock);
190 del_ufid_tc_mapping_unlocked(ufid);
191 ovs_mutex_unlock(&ufid_lock);
192 }
193
194 /* Wrapper function to delete filter and ufid tc mapping */
195 static int
196 del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
197 {
198 int err;
199
200 err = tc_del_filter(id);
201 del_ufid_tc_mapping(ufid);
202 return err;
203 }
204
205 /* Add ufid entry to ufid_to_tc hashmap. */
206 static void
207 add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
208 struct tcf_id *id)
209 {
210 struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
211 size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
212 size_t tc_hash;
213
214 tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex);
215 tc_hash = hash_int(id->chain, tc_hash);
216
217 new_data->ufid = *ufid;
218 new_data->id = *id;
219 new_data->netdev = netdev_ref(netdev);
220
221 ovs_mutex_lock(&ufid_lock);
222 hmap_insert(&ufid_to_tc, &new_data->ufid_to_tc_node, ufid_hash);
223 hmap_insert(&tc_to_ufid, &new_data->tc_to_ufid_node, tc_hash);
224 ovs_mutex_unlock(&ufid_lock);
225 }
226
227 /* Get tc id from ufid_to_tc hashmap.
228 *
229 * Returns 0 if successful and fills id.
230 * Otherwise returns the error.
231 */
232 static int
233 get_ufid_tc_mapping(const ovs_u128 *ufid, struct tcf_id *id)
234 {
235 size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
236 struct ufid_tc_data *data;
237
238 ovs_mutex_lock(&ufid_lock);
239 HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash, &ufid_to_tc) {
240 if (ovs_u128_equals(*ufid, data->ufid)) {
241 *id = data->id;
242 ovs_mutex_unlock(&ufid_lock);
243 return 0;
244 }
245 }
246 ovs_mutex_unlock(&ufid_lock);
247
248 return ENOENT;
249 }
250
251 /* Find ufid entry in ufid_to_tc hashmap using tcf_id id.
252 * The result is saved in ufid.
253 *
254 * Returns true on success.
255 */
256 static bool
257 find_ufid(struct netdev *netdev, struct tcf_id *id, ovs_u128 *ufid)
258 {
259 struct ufid_tc_data *data;
260 size_t tc_hash;
261
262 tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex);
263 tc_hash = hash_int(id->chain, tc_hash);
264
265 ovs_mutex_lock(&ufid_lock);
266 HMAP_FOR_EACH_WITH_HASH (data, tc_to_ufid_node, tc_hash, &tc_to_ufid) {
267 if (netdev == data->netdev && is_tcf_id_eq(&data->id, id)) {
268 *ufid = data->ufid;
269 break;
270 }
271 }
272 ovs_mutex_unlock(&ufid_lock);
273
274 return (data != NULL);
275 }
276
277 struct prio_map_data {
278 struct hmap_node node;
279 struct tc_flower_key mask;
280 ovs_be16 protocol;
281 uint16_t prio;
282 };
283
284 /* Get free prio for tc flower
285 * If prio is already allocated for mask/eth_type combination then return it.
286 * If not assign new prio.
287 *
288 * Return prio on success or 0 if we are out of prios.
289 */
290 static uint16_t
291 get_prio_for_tc_flower(struct tc_flower *flower)
292 {
293 static struct hmap prios = HMAP_INITIALIZER(&prios);
294 static struct ovs_mutex prios_lock = OVS_MUTEX_INITIALIZER;
295 static uint16_t last_prio = TC_RESERVED_PRIORITY_MAX;
296 size_t key_len = sizeof(struct tc_flower_key);
297 size_t hash = hash_int((OVS_FORCE uint32_t) flower->key.eth_type, 0);
298 struct prio_map_data *data;
299 struct prio_map_data *new_data;
300
301 if (!multi_mask_per_prio) {
302 hash = hash_bytes(&flower->mask, key_len, hash);
303 }
304
305 /* We can use the same prio for same mask/eth combination but must have
306 * different prio if not. Flower classifier will reject same prio for
307 * different mask combination unless multi mask per prio is supported. */
308 ovs_mutex_lock(&prios_lock);
309 HMAP_FOR_EACH_WITH_HASH (data, node, hash, &prios) {
310 if ((multi_mask_per_prio
311 || !memcmp(&flower->mask, &data->mask, key_len))
312 && data->protocol == flower->key.eth_type) {
313 ovs_mutex_unlock(&prios_lock);
314 return data->prio;
315 }
316 }
317
318 if (last_prio == UINT16_MAX) {
319 /* last_prio can overflow if there will be many different kinds of
320 * flows which shouldn't happen organically. */
321 ovs_mutex_unlock(&prios_lock);
322 return 0;
323 }
324
325 new_data = xzalloc(sizeof *new_data);
326 memcpy(&new_data->mask, &flower->mask, key_len);
327 new_data->prio = ++last_prio;
328 new_data->protocol = flower->key.eth_type;
329 hmap_insert(&prios, &new_data->node, hash);
330 ovs_mutex_unlock(&prios_lock);
331
332 return new_data->prio;
333 }
334
335 static uint32_t
336 get_block_id_from_netdev(struct netdev *netdev)
337 {
338 if (block_support) {
339 return netdev_get_block_id(netdev);
340 }
341
342 return 0;
343 }
344
345 static int
346 netdev_tc_flow_flush(struct netdev *netdev)
347 {
348 struct ufid_tc_data *data, *next;
349 int err;
350
351 ovs_mutex_lock(&ufid_lock);
352 HMAP_FOR_EACH_SAFE (data, next, tc_to_ufid_node, &tc_to_ufid) {
353 if (data->netdev != netdev) {
354 continue;
355 }
356
357 err = tc_del_filter(&data->id);
358 if (!err) {
359 del_ufid_tc_mapping_unlocked(&data->ufid);
360 }
361 }
362 ovs_mutex_unlock(&ufid_lock);
363
364 return 0;
365 }
366
367 static int
368 netdev_tc_flow_dump_create(struct netdev *netdev,
369 struct netdev_flow_dump **dump_out,
370 bool terse)
371 {
372 enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
373 struct netdev_flow_dump *dump;
374 uint32_t block_id = 0;
375 struct tcf_id id;
376 int prio = 0;
377 int ifindex;
378
379 ifindex = netdev_get_ifindex(netdev);
380 if (ifindex < 0) {
381 VLOG_ERR_RL(&error_rl, "dump_create: failed to get ifindex for %s: %s",
382 netdev_get_name(netdev), ovs_strerror(-ifindex));
383 return -ifindex;
384 }
385
386 block_id = get_block_id_from_netdev(netdev);
387 dump = xzalloc(sizeof *dump);
388 dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
389 dump->netdev = netdev_ref(netdev);
390 dump->terse = terse;
391
392 id = tc_make_tcf_id(ifindex, block_id, prio, hook);
393 tc_dump_flower_start(&id, dump->nl_dump, terse);
394
395 *dump_out = dump;
396
397 return 0;
398 }
399
400 static int
401 netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
402 {
403 nl_dump_done(dump->nl_dump);
404 netdev_close(dump->netdev);
405 free(dump->nl_dump);
406 free(dump);
407 return 0;
408 }
409
410 static void
411 parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
412 struct tc_flower *flower)
413 {
414 char *mask = (char *) &flower->rewrite.mask;
415 char *data = (char *) &flower->rewrite.key;
416
417 for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
418 char *put = NULL;
419 size_t nested = 0;
420 int len = ovs_flow_key_attr_lens[type].len;
421
422 if (len <= 0) {
423 continue;
424 }
425
426 for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
427 struct netlink_field *f = &set_flower_map[type][j];
428
429 if (!f->size) {
430 break;
431 }
432
433 if (!is_all_zeros(mask + f->flower_offset, f->size)) {
434 if (!put) {
435 nested = nl_msg_start_nested(buf,
436 OVS_ACTION_ATTR_SET_MASKED);
437 put = nl_msg_put_unspec_zero(buf, type, len * 2);
438 }
439
440 memcpy(put + f->offset, data + f->flower_offset, f->size);
441 memcpy(put + len + f->offset,
442 mask + f->flower_offset, f->size);
443 }
444 }
445
446 if (put) {
447 nl_msg_end_nested(buf, nested);
448 }
449 }
450 }
451
452 static void parse_tc_flower_geneve_opts(struct tc_action *action,
453 struct ofpbuf *buf)
454 {
455 int tun_opt_len = action->encap.data.present.len;
456 size_t geneve_off;
457 int idx = 0;
458
459 if (!tun_opt_len) {
460 return;
461 }
462
463 geneve_off = nl_msg_start_nested(buf, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
464 while (tun_opt_len) {
465 struct geneve_opt *opt;
466
467 opt = &action->encap.data.opts.gnv[idx];
468 nl_msg_put(buf, opt, sizeof(struct geneve_opt) + opt->length * 4);
469 idx += sizeof(struct geneve_opt) / 4 + opt->length;
470 tun_opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
471 }
472 nl_msg_end_nested(buf, geneve_off);
473 }
474
475 static void
476 flower_tun_opt_to_match(struct match *match, struct tc_flower *flower)
477 {
478 struct geneve_opt *opt, *opt_mask;
479 int len, cnt = 0;
480
481 memcpy(match->flow.tunnel.metadata.opts.gnv,
482 flower->key.tunnel.metadata.opts.gnv,
483 flower->key.tunnel.metadata.present.len);
484 match->flow.tunnel.metadata.present.len =
485 flower->key.tunnel.metadata.present.len;
486 match->flow.tunnel.flags |= FLOW_TNL_F_UDPIF;
487 memcpy(match->wc.masks.tunnel.metadata.opts.gnv,
488 flower->mask.tunnel.metadata.opts.gnv,
489 flower->mask.tunnel.metadata.present.len);
490
491 len = flower->key.tunnel.metadata.present.len;
492 while (len) {
493 opt = &match->flow.tunnel.metadata.opts.gnv[cnt];
494 opt_mask = &match->wc.masks.tunnel.metadata.opts.gnv[cnt];
495
496 opt_mask->length = 0x1f;
497
498 cnt += sizeof(struct geneve_opt) / 4 + opt->length;
499 len -= sizeof(struct geneve_opt) + opt->length * 4;
500 }
501
502 match->wc.masks.tunnel.metadata.present.len =
503 flower->mask.tunnel.metadata.present.len;
504 match->wc.masks.tunnel.flags |= FLOW_TNL_F_UDPIF;
505 }
506
507 static void
508 parse_tc_flower_to_stats(struct tc_flower *flower,
509 struct dpif_flow_stats *stats)
510 {
511 if (!stats) {
512 return;
513 }
514
515 memset(stats, 0, sizeof *stats);
516 stats->n_packets = get_32aligned_u64(&flower->stats.n_packets);
517 stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes);
518 stats->used = flower->lastused;
519 }
520
521 static void
522 parse_tc_flower_to_attrs(struct tc_flower *flower,
523 struct dpif_flow_attrs *attrs)
524 {
525 attrs->offloaded = (flower->offloaded_state == TC_OFFLOADED_STATE_IN_HW ||
526 flower->offloaded_state ==
527 TC_OFFLOADED_STATE_UNDEFINED);
528 attrs->dp_layer = "tc";
529 attrs->dp_extra_info = NULL;
530 }
531
532 static int
533 parse_tc_flower_terse_to_match(struct tc_flower *flower,
534 struct match *match,
535 struct dpif_flow_stats *stats,
536 struct dpif_flow_attrs *attrs)
537 {
538 match_init_catchall(match);
539
540 parse_tc_flower_to_stats(flower, stats);
541 parse_tc_flower_to_attrs(flower, attrs);
542
543 return 0;
544 }
545
546 static int
547 parse_tc_flower_to_match(struct tc_flower *flower,
548 struct match *match,
549 struct nlattr **actions,
550 struct dpif_flow_stats *stats,
551 struct dpif_flow_attrs *attrs,
552 struct ofpbuf *buf,
553 bool terse)
554 {
555 size_t act_off;
556 struct tc_flower_key *key = &flower->key;
557 struct tc_flower_key *mask = &flower->mask;
558 odp_port_t outport = 0;
559 struct tc_action *action;
560 int i;
561
562 if (terse) {
563 return parse_tc_flower_terse_to_match(flower, match, stats, attrs);
564 }
565
566 ofpbuf_clear(buf);
567
568 match_init_catchall(match);
569 match_set_dl_src_masked(match, key->src_mac, mask->src_mac);
570 match_set_dl_dst_masked(match, key->dst_mac, mask->dst_mac);
571
572 if (eth_type_vlan(key->eth_type)) {
573 match->flow.vlans[0].tpid = key->eth_type;
574 match->wc.masks.vlans[0].tpid = OVS_BE16_MAX;
575 match_set_dl_vlan(match, htons(key->vlan_id[0]), 0);
576 match_set_dl_vlan_pcp(match, key->vlan_prio[0], 0);
577
578 if (eth_type_vlan(key->encap_eth_type[0])) {
579 match_set_dl_vlan(match, htons(key->vlan_id[1]), 1);
580 match_set_dl_vlan_pcp(match, key->vlan_prio[1], 1);
581 match_set_dl_type(match, key->encap_eth_type[1]);
582 match->flow.vlans[1].tpid = key->encap_eth_type[0];
583 match->wc.masks.vlans[1].tpid = OVS_BE16_MAX;
584 } else {
585 match_set_dl_type(match, key->encap_eth_type[0]);
586 }
587 flow_fix_vlan_tpid(&match->flow);
588 } else if (eth_type_mpls(key->eth_type)) {
589 match->flow.mpls_lse[0] = key->mpls_lse & mask->mpls_lse;
590 match->wc.masks.mpls_lse[0] = mask->mpls_lse;
591 match_set_dl_type(match, key->encap_eth_type[0]);
592 } else if (key->eth_type == htons(ETH_TYPE_ARP)) {
593 match_set_arp_sha_masked(match, key->arp.sha, mask->arp.sha);
594 match_set_arp_tha_masked(match, key->arp.tha, mask->arp.tha);
595 match_set_arp_spa_masked(match, key->arp.spa, mask->arp.spa);
596 match_set_arp_tpa_masked(match, key->arp.tpa, mask->arp.tpa);
597 match_set_arp_opcode_masked(match, key->arp.opcode,
598 mask->arp.opcode);
599 match_set_dl_type(match, key->eth_type);
600 } else {
601 match_set_dl_type(match, key->eth_type);
602 }
603
604 if (is_ip_any(&match->flow)) {
605 if (key->ip_proto) {
606 match_set_nw_proto(match, key->ip_proto);
607 }
608
609 match_set_nw_tos_masked(match, key->ip_tos, mask->ip_tos);
610 match_set_nw_ttl_masked(match, key->ip_ttl, mask->ip_ttl);
611
612 if (mask->flags) {
613 uint8_t flags = 0;
614 uint8_t flags_mask = 0;
615
616 if (mask->flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT) {
617 if (key->flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT) {
618 flags |= FLOW_NW_FRAG_ANY;
619 }
620 flags_mask |= FLOW_NW_FRAG_ANY;
621 }
622
623 if (mask->flags & TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST) {
624 if (!(key->flags & TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST)) {
625 flags |= FLOW_NW_FRAG_LATER;
626 }
627 flags_mask |= FLOW_NW_FRAG_LATER;
628 }
629
630 match_set_nw_frag_masked(match, flags, flags_mask);
631 }
632
633 match_set_nw_src_masked(match, key->ipv4.ipv4_src, mask->ipv4.ipv4_src);
634 match_set_nw_dst_masked(match, key->ipv4.ipv4_dst, mask->ipv4.ipv4_dst);
635
636 match_set_ipv6_src_masked(match,
637 &key->ipv6.ipv6_src, &mask->ipv6.ipv6_src);
638 match_set_ipv6_dst_masked(match,
639 &key->ipv6.ipv6_dst, &mask->ipv6.ipv6_dst);
640
641 if (key->ip_proto == IPPROTO_TCP) {
642 match_set_tp_dst_masked(match, key->tcp_dst, mask->tcp_dst);
643 match_set_tp_src_masked(match, key->tcp_src, mask->tcp_src);
644 match_set_tcp_flags_masked(match, key->tcp_flags, mask->tcp_flags);
645 } else if (key->ip_proto == IPPROTO_UDP) {
646 match_set_tp_dst_masked(match, key->udp_dst, mask->udp_dst);
647 match_set_tp_src_masked(match, key->udp_src, mask->udp_src);
648 } else if (key->ip_proto == IPPROTO_SCTP) {
649 match_set_tp_dst_masked(match, key->sctp_dst, mask->sctp_dst);
650 match_set_tp_src_masked(match, key->sctp_src, mask->sctp_src);
651 }
652
653 if (mask->ct_state) {
654 uint8_t ct_statev = 0, ct_statem = 0;
655
656 if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_NEW) {
657 if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_NEW) {
658 ct_statev |= OVS_CS_F_NEW;
659 }
660 ct_statem |= OVS_CS_F_NEW;
661 }
662
663 if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
664 if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
665 ct_statev |= OVS_CS_F_ESTABLISHED;
666 }
667 ct_statem |= OVS_CS_F_ESTABLISHED;
668 }
669
670 if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED) {
671 if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED) {
672 ct_statev |= OVS_CS_F_TRACKED;
673 }
674 ct_statem |= OVS_CS_F_TRACKED;
675 }
676
677 match_set_ct_state_masked(match, ct_statev, ct_statem);
678 }
679
680 match_set_ct_zone_masked(match, key->ct_zone, mask->ct_zone);
681 match_set_ct_mark_masked(match, key->ct_mark, mask->ct_mark);
682 match_set_ct_label_masked(match, key->ct_label, mask->ct_label);
683 }
684
685 if (flower->tunnel) {
686 if (flower->mask.tunnel.id) {
687 match_set_tun_id(match, flower->key.tunnel.id);
688 match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
689 }
690 if (flower->mask.tunnel.ipv4.ipv4_dst ||
691 flower->mask.tunnel.ipv4.ipv4_src) {
692 match_set_tun_dst_masked(match,
693 flower->key.tunnel.ipv4.ipv4_dst,
694 flower->mask.tunnel.ipv4.ipv4_dst);
695 match_set_tun_src_masked(match,
696 flower->key.tunnel.ipv4.ipv4_src,
697 flower->mask.tunnel.ipv4.ipv4_src);
698 } else if (ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_dst) ||
699 ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_src)) {
700 match_set_tun_ipv6_dst_masked(match,
701 &flower->key.tunnel.ipv6.ipv6_dst,
702 &flower->mask.tunnel.ipv6.ipv6_dst);
703 match_set_tun_ipv6_src_masked(match,
704 &flower->key.tunnel.ipv6.ipv6_src,
705 &flower->mask.tunnel.ipv6.ipv6_src);
706 }
707 if (flower->key.tunnel.tos) {
708 match_set_tun_tos_masked(match, flower->key.tunnel.tos,
709 flower->mask.tunnel.tos);
710 }
711 if (flower->key.tunnel.ttl) {
712 match_set_tun_ttl_masked(match, flower->key.tunnel.ttl,
713 flower->mask.tunnel.ttl);
714 }
715 if (flower->key.tunnel.tp_dst) {
716 match_set_tun_tp_dst(match, flower->key.tunnel.tp_dst);
717 }
718 if (flower->key.tunnel.metadata.present.len) {
719 flower_tun_opt_to_match(match, flower);
720 }
721 }
722
723 act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
724 {
725 action = flower->actions;
726 for (i = 0; i < flower->action_count; i++, action++) {
727 switch (action->type) {
728 case TC_ACT_VLAN_POP: {
729 nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
730 }
731 break;
732 case TC_ACT_VLAN_PUSH: {
733 struct ovs_action_push_vlan *push;
734
735 push = nl_msg_put_unspec_zero(buf, OVS_ACTION_ATTR_PUSH_VLAN,
736 sizeof *push);
737 push->vlan_tpid = action->vlan.vlan_push_tpid;
738 push->vlan_tci = htons(action->vlan.vlan_push_id
739 | (action->vlan.vlan_push_prio << 13)
740 | VLAN_CFI);
741 }
742 break;
743 case TC_ACT_MPLS_POP: {
744 nl_msg_put_be16(buf, OVS_ACTION_ATTR_POP_MPLS,
745 action->mpls.proto);
746 }
747 break;
748 case TC_ACT_MPLS_PUSH: {
749 struct ovs_action_push_mpls *push;
750 ovs_be32 mpls_lse = 0;
751
752 flow_set_mpls_lse_label(&mpls_lse, action->mpls.label);
753 flow_set_mpls_lse_tc(&mpls_lse, action->mpls.tc);
754 flow_set_mpls_lse_ttl(&mpls_lse, action->mpls.ttl);
755 flow_set_mpls_lse_bos(&mpls_lse, action->mpls.bos);
756
757 push = nl_msg_put_unspec_zero(buf, OVS_ACTION_ATTR_PUSH_MPLS,
758 sizeof *push);
759 push->mpls_ethertype = action->mpls.proto;
760 push->mpls_lse = mpls_lse;
761 }
762 break;
763 case TC_ACT_MPLS_SET: {
764 size_t set_offset = nl_msg_start_nested(buf,
765 OVS_ACTION_ATTR_SET);
766 struct ovs_key_mpls *set_mpls;
767 ovs_be32 mpls_lse = 0;
768
769 flow_set_mpls_lse_label(&mpls_lse, action->mpls.label);
770 flow_set_mpls_lse_tc(&mpls_lse, action->mpls.tc);
771 flow_set_mpls_lse_ttl(&mpls_lse, action->mpls.ttl);
772 flow_set_mpls_lse_bos(&mpls_lse, action->mpls.bos);
773
774 set_mpls = nl_msg_put_unspec_zero(buf, OVS_KEY_ATTR_MPLS,
775 sizeof *set_mpls);
776 set_mpls->mpls_lse = mpls_lse;
777 nl_msg_end_nested(buf, set_offset);
778 }
779 break;
780 case TC_ACT_PEDIT: {
781 parse_flower_rewrite_to_netlink_action(buf, flower);
782 }
783 break;
784 case TC_ACT_ENCAP: {
785 size_t set_offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SET);
786 size_t tunnel_offset =
787 nl_msg_start_nested(buf, OVS_KEY_ATTR_TUNNEL);
788
789 if (action->encap.id_present) {
790 nl_msg_put_be64(buf, OVS_TUNNEL_KEY_ATTR_ID, action->encap.id);
791 }
792 if (action->encap.ipv4.ipv4_src) {
793 nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
794 action->encap.ipv4.ipv4_src);
795 }
796 if (action->encap.ipv4.ipv4_dst) {
797 nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
798 action->encap.ipv4.ipv4_dst);
799 }
800 if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_src)) {
801 nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
802 &action->encap.ipv6.ipv6_src);
803 }
804 if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_dst)) {
805 nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
806 &action->encap.ipv6.ipv6_dst);
807 }
808 if (action->encap.tos) {
809 nl_msg_put_u8(buf, OVS_TUNNEL_KEY_ATTR_TOS,
810 action->encap.tos);
811 }
812 if (action->encap.ttl) {
813 nl_msg_put_u8(buf, OVS_TUNNEL_KEY_ATTR_TTL,
814 action->encap.ttl);
815 }
816 if (action->encap.tp_dst) {
817 nl_msg_put_be16(buf, OVS_TUNNEL_KEY_ATTR_TP_DST,
818 action->encap.tp_dst);
819 }
820 if (!action->encap.no_csum) {
821 nl_msg_put_u8(buf, OVS_TUNNEL_KEY_ATTR_CSUM,
822 !action->encap.no_csum);
823 }
824
825 parse_tc_flower_geneve_opts(action, buf);
826 nl_msg_end_nested(buf, tunnel_offset);
827 nl_msg_end_nested(buf, set_offset);
828 }
829 break;
830 case TC_ACT_OUTPUT: {
831 if (action->out.ifindex_out) {
832 outport =
833 netdev_ifindex_to_odp_port(action->out.ifindex_out);
834 if (!outport) {
835 return ENOENT;
836 }
837 }
838 nl_msg_put_u32(buf, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(outport));
839 }
840 break;
841 case TC_ACT_CT: {
842 size_t ct_offset;
843
844 if (action->ct.clear) {
845 nl_msg_put_flag(buf, OVS_ACTION_ATTR_CT_CLEAR);
846 break;
847 }
848
849 ct_offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_CT);
850
851 if (action->ct.commit) {
852 nl_msg_put_flag(buf, OVS_CT_ATTR_COMMIT);
853 }
854
855 if (action->ct.zone) {
856 nl_msg_put_u16(buf, OVS_CT_ATTR_ZONE, action->ct.zone);
857 }
858
859 if (action->ct.mark_mask) {
860 uint32_t mark_and_mask[2] = { action->ct.mark,
861 action->ct.mark_mask };
862 nl_msg_put_unspec(buf, OVS_CT_ATTR_MARK, &mark_and_mask,
863 sizeof mark_and_mask);
864 }
865
866 if (!ovs_u128_is_zero(action->ct.label_mask)) {
867 struct {
868 ovs_u128 key;
869 ovs_u128 mask;
870 } *ct_label;
871
872 ct_label = nl_msg_put_unspec_uninit(buf,
873 OVS_CT_ATTR_LABELS,
874 sizeof *ct_label);
875 ct_label->key = action->ct.label;
876 ct_label->mask = action->ct.label_mask;
877 }
878
879 if (action->ct.nat_type) {
880 size_t nat_offset = nl_msg_start_nested(buf,
881 OVS_CT_ATTR_NAT);
882
883 if (action->ct.nat_type == TC_NAT_SRC) {
884 nl_msg_put_flag(buf, OVS_NAT_ATTR_SRC);
885 } else if (action->ct.nat_type == TC_NAT_DST) {
886 nl_msg_put_flag(buf, OVS_NAT_ATTR_DST);
887 }
888
889 if (action->ct.range.ip_family == AF_INET) {
890 nl_msg_put_be32(buf, OVS_NAT_ATTR_IP_MIN,
891 action->ct.range.ipv4.min);
892 nl_msg_put_be32(buf, OVS_NAT_ATTR_IP_MAX,
893 action->ct.range.ipv4.max);
894 } else if (action->ct.range.ip_family == AF_INET6) {
895 nl_msg_put_in6_addr(buf, OVS_NAT_ATTR_IP_MIN,
896 &action->ct.range.ipv6.min);
897 nl_msg_put_in6_addr(buf, OVS_NAT_ATTR_IP_MAX,
898 &action->ct.range.ipv6.max);
899 }
900
901 if (action->ct.range.port.min) {
902 nl_msg_put_u16(buf, OVS_NAT_ATTR_PROTO_MIN,
903 ntohs(action->ct.range.port.min));
904 if (action->ct.range.port.max) {
905 nl_msg_put_u16(buf, OVS_NAT_ATTR_PROTO_MAX,
906 ntohs(action->ct.range.port.max));
907 }
908 }
909
910 nl_msg_end_nested(buf, nat_offset);
911 }
912
913 nl_msg_end_nested(buf, ct_offset);
914 }
915 break;
916 case TC_ACT_GOTO: {
917 nl_msg_put_u32(buf, OVS_ACTION_ATTR_RECIRC, action->chain);
918 }
919 break;
920 }
921 }
922 }
923 nl_msg_end_nested(buf, act_off);
924
925 *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
926
927 parse_tc_flower_to_stats(flower, stats);
928 parse_tc_flower_to_attrs(flower, attrs);
929
930 return 0;
931 }
932
933 static bool
934 netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
935 struct match *match,
936 struct nlattr **actions,
937 struct dpif_flow_stats *stats,
938 struct dpif_flow_attrs *attrs,
939 ovs_u128 *ufid,
940 struct ofpbuf *rbuffer,
941 struct ofpbuf *wbuffer)
942 {
943 struct netdev *netdev = dump->netdev;
944 struct ofpbuf nl_flow;
945 struct tcf_id id;
946
947 id = tc_make_tcf_id(netdev_get_ifindex(netdev),
948 get_block_id_from_netdev(netdev),
949 0, /* prio */
950 get_tc_qdisc_hook(netdev));
951
952 while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) {
953 struct tc_flower flower;
954
955 if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower, dump->terse)) {
956 continue;
957 }
958
959 if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
960 wbuffer, dump->terse)) {
961 continue;
962 }
963
964 if (flower.act_cookie.len) {
965 *ufid = *((ovs_u128 *) flower.act_cookie.data);
966 } else if (!find_ufid(netdev, &id, ufid)) {
967 continue;
968 }
969
970 match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
971 match->flow.in_port.odp_port = dump->port;
972 match_set_recirc_id(match, id.chain);
973
974 return true;
975 }
976
977 return false;
978 }
979
980 static int
981 parse_mpls_set_action(struct tc_flower *flower, struct tc_action *action,
982 const struct nlattr *set)
983 {
984 const struct ovs_key_mpls *mpls_set = nl_attr_get(set);
985
986 action->mpls.label = mpls_lse_to_label(mpls_set->mpls_lse);
987 action->mpls.tc = mpls_lse_to_tc(mpls_set->mpls_lse);
988 action->mpls.ttl = mpls_lse_to_ttl(mpls_set->mpls_lse);
989 action->mpls.bos = mpls_lse_to_bos(mpls_set->mpls_lse);
990 action->type = TC_ACT_MPLS_SET;
991 flower->action_count++;
992
993 return 0;
994 }
995
996 static int
997 parse_put_flow_nat_action(struct tc_action *action,
998 const struct nlattr *nat,
999 size_t nat_len)
1000 {
1001 const struct nlattr *nat_attr;
1002 size_t nat_left;
1003
1004 action->ct.nat_type = TC_NAT_RESTORE;
1005 NL_ATTR_FOR_EACH_UNSAFE (nat_attr, nat_left, nat, nat_len) {
1006 switch (nl_attr_type(nat_attr)) {
1007 case OVS_NAT_ATTR_SRC: {
1008 action->ct.nat_type = TC_NAT_SRC;
1009 };
1010 break;
1011 case OVS_NAT_ATTR_DST: {
1012 action->ct.nat_type = TC_NAT_DST;
1013 };
1014 break;
1015 case OVS_NAT_ATTR_IP_MIN: {
1016 if (nl_attr_get_size(nat_attr) == sizeof(ovs_be32)) {
1017 ovs_be32 addr = nl_attr_get_be32(nat_attr);
1018
1019 action->ct.range.ipv4.min = addr;
1020 action->ct.range.ip_family = AF_INET;
1021 } else {
1022 struct in6_addr addr = nl_attr_get_in6_addr(nat_attr);
1023
1024 action->ct.range.ipv6.min = addr;
1025 action->ct.range.ip_family = AF_INET6;
1026 }
1027 };
1028 break;
1029 case OVS_NAT_ATTR_IP_MAX: {
1030 if (nl_attr_get_size(nat_attr) == sizeof(ovs_be32)) {
1031 ovs_be32 addr = nl_attr_get_be32(nat_attr);
1032
1033 action->ct.range.ipv4.max = addr;
1034 action->ct.range.ip_family = AF_INET;
1035 } else {
1036 struct in6_addr addr = nl_attr_get_in6_addr(nat_attr);
1037
1038 action->ct.range.ipv6.max = addr;
1039 action->ct.range.ip_family = AF_INET6;
1040 }
1041 };
1042 break;
1043 case OVS_NAT_ATTR_PROTO_MIN: {
1044 action->ct.range.port.min = htons(nl_attr_get_u16(nat_attr));
1045 };
1046 break;
1047 case OVS_NAT_ATTR_PROTO_MAX: {
1048 action->ct.range.port.max = htons(nl_attr_get_u16(nat_attr));
1049 };
1050 break;
1051 }
1052 }
1053 return 0;
1054 }
1055
1056 static int
1057 parse_put_flow_ct_action(struct tc_flower *flower,
1058 struct tc_action *action,
1059 const struct nlattr *ct,
1060 size_t ct_len)
1061 {
1062 const struct nlattr *ct_attr;
1063 size_t ct_left;
1064 int err;
1065
1066 NL_ATTR_FOR_EACH_UNSAFE (ct_attr, ct_left, ct, ct_len) {
1067 switch (nl_attr_type(ct_attr)) {
1068 case OVS_CT_ATTR_COMMIT: {
1069 action->ct.commit = true;
1070 }
1071 break;
1072 case OVS_CT_ATTR_ZONE: {
1073 action->ct.zone = nl_attr_get_u16(ct_attr);
1074 }
1075 break;
1076 case OVS_CT_ATTR_NAT: {
1077 const struct nlattr *nat = nl_attr_get(ct_attr);
1078 const size_t nat_len = nl_attr_get_size(ct_attr);
1079
1080 err = parse_put_flow_nat_action(action, nat, nat_len);
1081 if (err) {
1082 return err;
1083 }
1084 }
1085 break;
1086 case OVS_CT_ATTR_MARK: {
1087 const struct {
1088 uint32_t key;
1089 uint32_t mask;
1090 } *ct_mark;
1091
1092 ct_mark = nl_attr_get_unspec(ct_attr, sizeof *ct_mark);
1093 action->ct.mark = ct_mark->key;
1094 action->ct.mark_mask = ct_mark->mask;
1095 }
1096 break;
1097 case OVS_CT_ATTR_LABELS: {
1098 const struct {
1099 ovs_u128 key;
1100 ovs_u128 mask;
1101 } *ct_label;
1102
1103 ct_label = nl_attr_get_unspec(ct_attr, sizeof *ct_label);
1104 action->ct.label = ct_label->key;
1105 action->ct.label_mask = ct_label->mask;
1106 }
1107 break;
1108 }
1109 }
1110
1111 action->type = TC_ACT_CT;
1112 flower->action_count++;
1113 return 0;
1114 }
1115
1116 static int
1117 parse_put_flow_set_masked_action(struct tc_flower *flower,
1118 struct tc_action *action,
1119 const struct nlattr *set,
1120 size_t set_len,
1121 bool hasmask)
1122 {
1123 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1124 uint64_t set_stub[1024 / 8];
1125 struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub);
1126 char *set_data, *set_mask;
1127 char *key = (char *) &flower->rewrite.key;
1128 char *mask = (char *) &flower->rewrite.mask;
1129 const struct nlattr *attr;
1130 int i, j, type;
1131 size_t size;
1132
1133 /* copy so we can set attr mask to 0 for used ovs key struct members */
1134 attr = ofpbuf_put(&set_buf, set, set_len);
1135
1136 type = nl_attr_type(attr);
1137 size = nl_attr_get_size(attr) / 2;
1138 set_data = CONST_CAST(char *, nl_attr_get(attr));
1139 set_mask = set_data + size;
1140
1141 if (type >= ARRAY_SIZE(set_flower_map)
1142 || !set_flower_map[type][0].size) {
1143 VLOG_DBG_RL(&rl, "unsupported set action type: %d", type);
1144 ofpbuf_uninit(&set_buf);
1145 return EOPNOTSUPP;
1146 }
1147
1148 for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
1149 struct netlink_field *f = &set_flower_map[type][i];
1150
1151 if (!f->size) {
1152 break;
1153 }
1154
1155 /* copy masked value */
1156 for (j = 0; j < f->size; j++) {
1157 char maskval = hasmask ? set_mask[f->offset + j] : 0xFF;
1158
1159 key[f->flower_offset + j] = maskval & set_data[f->offset + j];
1160 mask[f->flower_offset + j] = maskval;
1161
1162 }
1163
1164 /* set its mask to 0 to show it's been used. */
1165 if (hasmask) {
1166 memset(set_mask + f->offset, 0, f->size);
1167 }
1168 }
1169
1170 if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) {
1171 if (flower->rewrite.rewrite == false) {
1172 flower->rewrite.rewrite = true;
1173 action->type = TC_ACT_PEDIT;
1174 flower->action_count++;
1175 }
1176 }
1177
1178 if (hasmask && !is_all_zeros(set_mask, size)) {
1179 VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d",
1180 type);
1181 ofpbuf_uninit(&set_buf);
1182 return EOPNOTSUPP;
1183 }
1184
1185 ofpbuf_uninit(&set_buf);
1186 return 0;
1187 }
1188
1189 static int
1190 parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action,
1191 const struct nlattr *set, size_t set_len)
1192 {
1193 const struct nlattr *tunnel;
1194 const struct nlattr *tun_attr;
1195 size_t tun_left, tunnel_len;
1196
1197 if (nl_attr_type(set) == OVS_KEY_ATTR_MPLS) {
1198 return parse_mpls_set_action(flower, action, set);
1199 }
1200
1201 if (nl_attr_type(set) != OVS_KEY_ATTR_TUNNEL) {
1202 return parse_put_flow_set_masked_action(flower, action, set,
1203 set_len, false);
1204 }
1205
1206 tunnel = nl_attr_get(set);
1207 tunnel_len = nl_attr_get_size(set);
1208
1209 action->type = TC_ACT_ENCAP;
1210 action->encap.id_present = false;
1211 flower->action_count++;
1212 NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
1213 switch (nl_attr_type(tun_attr)) {
1214 case OVS_TUNNEL_KEY_ATTR_ID: {
1215 action->encap.id = nl_attr_get_be64(tun_attr);
1216 action->encap.id_present = true;
1217 }
1218 break;
1219 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: {
1220 action->encap.ipv4.ipv4_src = nl_attr_get_be32(tun_attr);
1221 }
1222 break;
1223 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: {
1224 action->encap.ipv4.ipv4_dst = nl_attr_get_be32(tun_attr);
1225 }
1226 break;
1227 case OVS_TUNNEL_KEY_ATTR_TOS: {
1228 action->encap.tos = nl_attr_get_u8(tun_attr);
1229 }
1230 break;
1231 case OVS_TUNNEL_KEY_ATTR_TTL: {
1232 action->encap.ttl = nl_attr_get_u8(tun_attr);
1233 }
1234 break;
1235 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
1236 action->encap.ipv6.ipv6_src =
1237 nl_attr_get_in6_addr(tun_attr);
1238 }
1239 break;
1240 case OVS_TUNNEL_KEY_ATTR_IPV6_DST: {
1241 action->encap.ipv6.ipv6_dst =
1242 nl_attr_get_in6_addr(tun_attr);
1243 }
1244 break;
1245 case OVS_TUNNEL_KEY_ATTR_TP_SRC: {
1246 action->encap.tp_src = nl_attr_get_be16(tun_attr);
1247 }
1248 break;
1249 case OVS_TUNNEL_KEY_ATTR_TP_DST: {
1250 action->encap.tp_dst = nl_attr_get_be16(tun_attr);
1251 }
1252 break;
1253 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: {
1254 memcpy(action->encap.data.opts.gnv, nl_attr_get(tun_attr),
1255 nl_attr_get_size(tun_attr));
1256 action->encap.data.present.len = nl_attr_get_size(tun_attr);
1257 }
1258 break;
1259 }
1260 }
1261
1262 return 0;
1263 }
1264
1265 static int
1266 test_key_and_mask(struct match *match)
1267 {
1268 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1269 const struct flow *key = &match->flow;
1270 struct flow *mask = &match->wc.masks;
1271
1272 if (mask->pkt_mark) {
1273 VLOG_DBG_RL(&rl, "offloading attribute pkt_mark isn't supported");
1274 return EOPNOTSUPP;
1275 }
1276
1277 if (mask->dp_hash) {
1278 VLOG_DBG_RL(&rl, "offloading attribute dp_hash isn't supported");
1279 return EOPNOTSUPP;
1280 }
1281
1282 if (mask->conj_id) {
1283 VLOG_DBG_RL(&rl, "offloading attribute conj_id isn't supported");
1284 return EOPNOTSUPP;
1285 }
1286
1287 if (mask->skb_priority) {
1288 VLOG_DBG_RL(&rl, "offloading attribute skb_priority isn't supported");
1289 return EOPNOTSUPP;
1290 }
1291
1292 if (mask->actset_output) {
1293 VLOG_DBG_RL(&rl,
1294 "offloading attribute actset_output isn't supported");
1295 return EOPNOTSUPP;
1296 }
1297
1298 if (mask->packet_type && key->packet_type) {
1299 VLOG_DBG_RL(&rl, "offloading attribute packet_type isn't supported");
1300 return EOPNOTSUPP;
1301 }
1302 mask->packet_type = 0;
1303
1304 for (int i = 0; i < FLOW_N_REGS; i++) {
1305 if (mask->regs[i]) {
1306 VLOG_DBG_RL(&rl,
1307 "offloading attribute regs[%d] isn't supported", i);
1308 return EOPNOTSUPP;
1309 }
1310 }
1311
1312 if (mask->metadata) {
1313 VLOG_DBG_RL(&rl, "offloading attribute metadata isn't supported");
1314 return EOPNOTSUPP;
1315 }
1316
1317 if (mask->nw_tos) {
1318 VLOG_DBG_RL(&rl, "offloading attribute nw_tos isn't supported");
1319 return EOPNOTSUPP;
1320 }
1321
1322 for (int i = 1; i < FLOW_MAX_MPLS_LABELS; i++) {
1323 if (mask->mpls_lse[i]) {
1324 VLOG_DBG_RL(&rl, "offloading multiple mpls_lses isn't supported");
1325 return EOPNOTSUPP;
1326 }
1327 }
1328
1329 if (key->dl_type == htons(ETH_TYPE_IP) &&
1330 key->nw_proto == IPPROTO_ICMP) {
1331 if (mask->tp_src) {
1332 VLOG_DBG_RL(&rl,
1333 "offloading attribute icmp_type isn't supported");
1334 return EOPNOTSUPP;
1335 }
1336 if (mask->tp_dst) {
1337 VLOG_DBG_RL(&rl,
1338 "offloading attribute icmp_code isn't supported");
1339 return EOPNOTSUPP;
1340 }
1341 } else if (key->dl_type == htons(ETH_TYPE_IP) &&
1342 key->nw_proto == IPPROTO_IGMP) {
1343 if (mask->tp_src) {
1344 VLOG_DBG_RL(&rl,
1345 "offloading attribute igmp_type isn't supported");
1346 return EOPNOTSUPP;
1347 }
1348 if (mask->tp_dst) {
1349 VLOG_DBG_RL(&rl,
1350 "offloading attribute igmp_code isn't supported");
1351 return EOPNOTSUPP;
1352 }
1353 } else if (key->dl_type == htons(ETH_TYPE_IPV6) &&
1354 key->nw_proto == IPPROTO_ICMPV6) {
1355 if (mask->tp_src) {
1356 VLOG_DBG_RL(&rl,
1357 "offloading attribute icmpv6_type isn't supported");
1358 return EOPNOTSUPP;
1359 }
1360 if (mask->tp_dst) {
1361 VLOG_DBG_RL(&rl,
1362 "offloading attribute icmpv6_code isn't supported");
1363 return EOPNOTSUPP;
1364 }
1365 } else if (key->dl_type == htons(OFP_DL_TYPE_NOT_ETH_TYPE)) {
1366 VLOG_DBG_RL(&rl,
1367 "offloading of non-ethernet packets isn't supported");
1368 return EOPNOTSUPP;
1369 }
1370
1371 if (!is_all_zeros(mask, sizeof *mask)) {
1372 VLOG_DBG_RL(&rl, "offloading isn't supported, unknown attribute");
1373 return EOPNOTSUPP;
1374 }
1375
1376 return 0;
1377 }
1378
1379 static void
1380 flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl,
1381 const struct flow_tnl *tnl_mask)
1382 {
1383 struct geneve_opt *opt, *opt_mask;
1384 int len, cnt = 0;
1385
1386 memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
1387 tnl->metadata.present.len);
1388 flower->key.tunnel.metadata.present.len = tnl->metadata.present.len;
1389
1390 memcpy(flower->mask.tunnel.metadata.opts.gnv, tnl_mask->metadata.opts.gnv,
1391 tnl->metadata.present.len);
1392
1393 len = flower->key.tunnel.metadata.present.len;
1394 while (len) {
1395 opt = &flower->key.tunnel.metadata.opts.gnv[cnt];
1396 opt_mask = &flower->mask.tunnel.metadata.opts.gnv[cnt];
1397
1398 opt_mask->length = opt->length;
1399
1400 cnt += sizeof(struct geneve_opt) / 4 + opt->length;
1401 len -= sizeof(struct geneve_opt) + opt->length * 4;
1402 }
1403
1404 flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
1405 }
1406
1407 static int
1408 netdev_tc_flow_put(struct netdev *netdev, struct match *match,
1409 struct nlattr *actions, size_t actions_len,
1410 const ovs_u128 *ufid, struct offload_info *info,
1411 struct dpif_flow_stats *stats)
1412 {
1413 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1414 enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
1415 struct tc_flower flower;
1416 const struct flow *key = &match->flow;
1417 struct flow *mask = &match->wc.masks;
1418 const struct flow_tnl *tnl = &match->flow.tunnel;
1419 const struct flow_tnl *tnl_mask = &mask->tunnel;
1420 struct tc_action *action;
1421 bool recirc_act = false;
1422 uint32_t block_id = 0;
1423 struct nlattr *nla;
1424 struct tcf_id id;
1425 uint32_t chain;
1426 size_t left;
1427 int prio = 0;
1428 int ifindex;
1429 int err;
1430
1431 ifindex = netdev_get_ifindex(netdev);
1432 if (ifindex < 0) {
1433 VLOG_ERR_RL(&error_rl, "flow_put: failed to get ifindex for %s: %s",
1434 netdev_get_name(netdev), ovs_strerror(-ifindex));
1435 return -ifindex;
1436 }
1437
1438 memset(&flower, 0, sizeof flower);
1439
1440 chain = key->recirc_id;
1441 mask->recirc_id = 0;
1442
1443 if (flow_tnl_dst_is_set(&key->tunnel) ||
1444 flow_tnl_src_is_set(&key->tunnel)) {
1445 VLOG_DBG_RL(&rl,
1446 "tunnel: id %#" PRIx64 " src " IP_FMT
1447 " dst " IP_FMT " tp_src %d tp_dst %d",
1448 ntohll(tnl->tun_id),
1449 IP_ARGS(tnl->ip_src), IP_ARGS(tnl->ip_dst),
1450 ntohs(tnl->tp_src), ntohs(tnl->tp_dst));
1451 flower.key.tunnel.id = tnl->tun_id;
1452 flower.key.tunnel.ipv4.ipv4_src = tnl->ip_src;
1453 flower.key.tunnel.ipv4.ipv4_dst = tnl->ip_dst;
1454 flower.key.tunnel.ipv6.ipv6_src = tnl->ipv6_src;
1455 flower.key.tunnel.ipv6.ipv6_dst = tnl->ipv6_dst;
1456 flower.key.tunnel.tos = tnl->ip_tos;
1457 flower.key.tunnel.ttl = tnl->ip_ttl;
1458 flower.key.tunnel.tp_src = tnl->tp_src;
1459 flower.key.tunnel.tp_dst = tnl->tp_dst;
1460 flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
1461 flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
1462 flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
1463 flower.mask.tunnel.ipv6.ipv6_dst = tnl_mask->ipv6_dst;
1464 flower.mask.tunnel.tos = tnl_mask->ip_tos;
1465 flower.mask.tunnel.ttl = tnl_mask->ip_ttl;
1466 flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0;
1467 flower_match_to_tun_opt(&flower, tnl, tnl_mask);
1468 flower.tunnel = true;
1469 }
1470 memset(&mask->tunnel, 0, sizeof mask->tunnel);
1471
1472 flower.key.eth_type = key->dl_type;
1473 flower.mask.eth_type = mask->dl_type;
1474 if (mask->mpls_lse[0]) {
1475 flower.key.mpls_lse = key->mpls_lse[0];
1476 flower.mask.mpls_lse = mask->mpls_lse[0];
1477 flower.key.encap_eth_type[0] = flower.key.eth_type;
1478 }
1479 mask->mpls_lse[0] = 0;
1480
1481 if (mask->vlans[0].tpid && eth_type_vlan(key->vlans[0].tpid)) {
1482 flower.key.encap_eth_type[0] = flower.key.eth_type;
1483 flower.mask.encap_eth_type[0] = flower.mask.eth_type;
1484 flower.key.eth_type = key->vlans[0].tpid;
1485 flower.mask.eth_type = mask->vlans[0].tpid;
1486 }
1487 if (mask->vlans[0].tci) {
1488 ovs_be16 vid_mask = mask->vlans[0].tci & htons(VLAN_VID_MASK);
1489 ovs_be16 pcp_mask = mask->vlans[0].tci & htons(VLAN_PCP_MASK);
1490 ovs_be16 cfi = mask->vlans[0].tci & htons(VLAN_CFI);
1491
1492 if (cfi && key->vlans[0].tci & htons(VLAN_CFI)
1493 && (!vid_mask || vid_mask == htons(VLAN_VID_MASK))
1494 && (!pcp_mask || pcp_mask == htons(VLAN_PCP_MASK))
1495 && (vid_mask || pcp_mask)) {
1496 if (vid_mask) {
1497 flower.key.vlan_id[0] = vlan_tci_to_vid(key->vlans[0].tci);
1498 flower.mask.vlan_id[0] = vlan_tci_to_vid(mask->vlans[0].tci);
1499 VLOG_DBG_RL(&rl, "vlan_id[0]: %d\n", flower.key.vlan_id[0]);
1500 }
1501 if (pcp_mask) {
1502 flower.key.vlan_prio[0] = vlan_tci_to_pcp(key->vlans[0].tci);
1503 flower.mask.vlan_prio[0] = vlan_tci_to_pcp(mask->vlans[0].tci);
1504 VLOG_DBG_RL(&rl, "vlan_prio[0]: %d\n",
1505 flower.key.vlan_prio[0]);
1506 }
1507 } else if (mask->vlans[0].tci == htons(0xffff) &&
1508 ntohs(key->vlans[0].tci) == 0) {
1509 /* exact && no vlan */
1510 } else {
1511 /* partial mask */
1512 return EOPNOTSUPP;
1513 }
1514 }
1515
1516 if (mask->vlans[1].tpid && eth_type_vlan(key->vlans[1].tpid)) {
1517 flower.key.encap_eth_type[1] = flower.key.encap_eth_type[0];
1518 flower.mask.encap_eth_type[1] = flower.mask.encap_eth_type[0];
1519 flower.key.encap_eth_type[0] = key->vlans[1].tpid;
1520 flower.mask.encap_eth_type[0] = mask->vlans[1].tpid;
1521 }
1522 if (mask->vlans[1].tci) {
1523 ovs_be16 vid_mask = mask->vlans[1].tci & htons(VLAN_VID_MASK);
1524 ovs_be16 pcp_mask = mask->vlans[1].tci & htons(VLAN_PCP_MASK);
1525 ovs_be16 cfi = mask->vlans[1].tci & htons(VLAN_CFI);
1526
1527 if (cfi && key->vlans[1].tci & htons(VLAN_CFI)
1528 && (!vid_mask || vid_mask == htons(VLAN_VID_MASK))
1529 && (!pcp_mask || pcp_mask == htons(VLAN_PCP_MASK))
1530 && (vid_mask || pcp_mask)) {
1531 if (vid_mask) {
1532 flower.key.vlan_id[1] = vlan_tci_to_vid(key->vlans[1].tci);
1533 flower.mask.vlan_id[1] = vlan_tci_to_vid(mask->vlans[1].tci);
1534 VLOG_DBG_RL(&rl, "vlan_id[1]: %d", flower.key.vlan_id[1]);
1535 }
1536 if (pcp_mask) {
1537 flower.key.vlan_prio[1] = vlan_tci_to_pcp(key->vlans[1].tci);
1538 flower.mask.vlan_prio[1] = vlan_tci_to_pcp(mask->vlans[1].tci);
1539 VLOG_DBG_RL(&rl, "vlan_prio[1]: %d", flower.key.vlan_prio[1]);
1540 }
1541 } else if (mask->vlans[1].tci == htons(0xffff) &&
1542 ntohs(key->vlans[1].tci) == 0) {
1543 /* exact && no vlan */
1544 } else {
1545 /* partial mask */
1546 return EOPNOTSUPP;
1547 }
1548 }
1549 memset(mask->vlans, 0, sizeof mask->vlans);
1550
1551 flower.key.dst_mac = key->dl_dst;
1552 flower.mask.dst_mac = mask->dl_dst;
1553 flower.key.src_mac = key->dl_src;
1554 flower.mask.src_mac = mask->dl_src;
1555 memset(&mask->dl_dst, 0, sizeof mask->dl_dst);
1556 memset(&mask->dl_src, 0, sizeof mask->dl_src);
1557 mask->dl_type = 0;
1558 mask->in_port.odp_port = 0;
1559
1560 if (key->dl_type == htons(ETH_P_ARP)) {
1561 flower.key.arp.spa = key->nw_src;
1562 flower.key.arp.tpa = key->nw_dst;
1563 flower.key.arp.sha = key->arp_sha;
1564 flower.key.arp.tha = key->arp_tha;
1565 flower.key.arp.opcode = key->nw_proto;
1566 flower.mask.arp.spa = mask->nw_src;
1567 flower.mask.arp.tpa = mask->nw_dst;
1568 flower.mask.arp.sha = mask->arp_sha;
1569 flower.mask.arp.tha = mask->arp_tha;
1570 flower.mask.arp.opcode = mask->nw_proto;
1571
1572 mask->nw_src = 0;
1573 mask->nw_dst = 0;
1574 mask->nw_proto = 0;
1575 memset(&mask->arp_sha, 0, sizeof mask->arp_sha);
1576 memset(&mask->arp_tha, 0, sizeof mask->arp_tha);
1577 }
1578
1579 if (is_ip_any(key)) {
1580 flower.key.ip_proto = key->nw_proto;
1581 flower.mask.ip_proto = mask->nw_proto;
1582 mask->nw_proto = 0;
1583 flower.key.ip_tos = key->nw_tos;
1584 flower.mask.ip_tos = mask->nw_tos;
1585 mask->nw_tos = 0;
1586 flower.key.ip_ttl = key->nw_ttl;
1587 flower.mask.ip_ttl = mask->nw_ttl;
1588 mask->nw_ttl = 0;
1589
1590 if (mask->nw_frag & FLOW_NW_FRAG_ANY) {
1591 flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
1592
1593 if (key->nw_frag & FLOW_NW_FRAG_ANY) {
1594 flower.key.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
1595
1596 if (mask->nw_frag & FLOW_NW_FRAG_LATER) {
1597 flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
1598
1599 if (!(key->nw_frag & FLOW_NW_FRAG_LATER)) {
1600 flower.key.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
1601 }
1602 }
1603 }
1604
1605 mask->nw_frag = 0;
1606 }
1607
1608 if (key->nw_proto == IPPROTO_TCP) {
1609 flower.key.tcp_dst = key->tp_dst;
1610 flower.mask.tcp_dst = mask->tp_dst;
1611 flower.key.tcp_src = key->tp_src;
1612 flower.mask.tcp_src = mask->tp_src;
1613 flower.key.tcp_flags = key->tcp_flags;
1614 flower.mask.tcp_flags = mask->tcp_flags;
1615 mask->tp_src = 0;
1616 mask->tp_dst = 0;
1617 mask->tcp_flags = 0;
1618 } else if (key->nw_proto == IPPROTO_UDP) {
1619 flower.key.udp_dst = key->tp_dst;
1620 flower.mask.udp_dst = mask->tp_dst;
1621 flower.key.udp_src = key->tp_src;
1622 flower.mask.udp_src = mask->tp_src;
1623 mask->tp_src = 0;
1624 mask->tp_dst = 0;
1625 } else if (key->nw_proto == IPPROTO_SCTP) {
1626 flower.key.sctp_dst = key->tp_dst;
1627 flower.mask.sctp_dst = mask->tp_dst;
1628 flower.key.sctp_src = key->tp_src;
1629 flower.mask.sctp_src = mask->tp_src;
1630 mask->tp_src = 0;
1631 mask->tp_dst = 0;
1632 }
1633
1634 if (key->dl_type == htons(ETH_P_IP)) {
1635 flower.key.ipv4.ipv4_src = key->nw_src;
1636 flower.mask.ipv4.ipv4_src = mask->nw_src;
1637 flower.key.ipv4.ipv4_dst = key->nw_dst;
1638 flower.mask.ipv4.ipv4_dst = mask->nw_dst;
1639 mask->nw_src = 0;
1640 mask->nw_dst = 0;
1641 } else if (key->dl_type == htons(ETH_P_IPV6)) {
1642 flower.key.ipv6.ipv6_src = key->ipv6_src;
1643 flower.mask.ipv6.ipv6_src = mask->ipv6_src;
1644 flower.key.ipv6.ipv6_dst = key->ipv6_dst;
1645 flower.mask.ipv6.ipv6_dst = mask->ipv6_dst;
1646 memset(&mask->ipv6_src, 0, sizeof mask->ipv6_src);
1647 memset(&mask->ipv6_dst, 0, sizeof mask->ipv6_dst);
1648 }
1649 }
1650
1651 if (mask->ct_state) {
1652 if (mask->ct_state & OVS_CS_F_NEW) {
1653 if (key->ct_state & OVS_CS_F_NEW) {
1654 flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW;
1655 }
1656 flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW;
1657 }
1658
1659 if (mask->ct_state & OVS_CS_F_ESTABLISHED) {
1660 if (key->ct_state & OVS_CS_F_ESTABLISHED) {
1661 flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1662 }
1663 flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1664 }
1665
1666 if (mask->ct_state & OVS_CS_F_TRACKED) {
1667 if (key->ct_state & OVS_CS_F_TRACKED) {
1668 flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1669 }
1670 flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1671 }
1672
1673 if (flower.key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
1674 flower.key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW);
1675 flower.mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW);
1676 }
1677
1678 mask->ct_state = 0;
1679 }
1680
1681 if (mask->ct_zone) {
1682 flower.key.ct_zone = key->ct_zone;
1683 flower.mask.ct_zone = mask->ct_zone;
1684 mask->ct_zone = 0;
1685 }
1686
1687 if (mask->ct_mark) {
1688 flower.key.ct_mark = key->ct_mark;
1689 flower.mask.ct_mark = mask->ct_mark;
1690 mask->ct_mark = 0;
1691 }
1692
1693 if (!ovs_u128_is_zero(mask->ct_label)) {
1694 flower.key.ct_label = key->ct_label;
1695 flower.mask.ct_label = mask->ct_label;
1696 mask->ct_label = OVS_U128_ZERO;
1697 }
1698
1699 /* ignore exact match on skb_mark of 0. */
1700 if (mask->pkt_mark == UINT32_MAX && !key->pkt_mark) {
1701 mask->pkt_mark = 0;
1702 }
1703
1704 err = test_key_and_mask(match);
1705 if (err) {
1706 return err;
1707 }
1708
1709 NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
1710 if (flower.action_count >= TCA_ACT_MAX_NUM) {
1711 VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
1712 return EOPNOTSUPP;
1713 }
1714 action = &flower.actions[flower.action_count];
1715 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1716 odp_port_t port = nl_attr_get_odp_port(nla);
1717 struct netdev *outdev = netdev_ports_get(port, info->dpif_class);
1718
1719 if (!outdev) {
1720 VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
1721 return ENODEV;
1722 }
1723 action->out.ifindex_out = netdev_get_ifindex(outdev);
1724 action->out.ingress = is_internal_port(netdev_get_type(outdev));
1725 action->type = TC_ACT_OUTPUT;
1726 flower.action_count++;
1727 netdev_close(outdev);
1728 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) {
1729 const struct ovs_action_push_vlan *vlan_push = nl_attr_get(nla);
1730
1731 action->vlan.vlan_push_tpid = vlan_push->vlan_tpid;
1732 action->vlan.vlan_push_id = vlan_tci_to_vid(vlan_push->vlan_tci);
1733 action->vlan.vlan_push_prio = vlan_tci_to_pcp(vlan_push->vlan_tci);
1734 action->type = TC_ACT_VLAN_PUSH;
1735 flower.action_count++;
1736 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
1737 action->type = TC_ACT_VLAN_POP;
1738 flower.action_count++;
1739 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_MPLS) {
1740 const struct ovs_action_push_mpls *mpls_push = nl_attr_get(nla);
1741
1742 action->mpls.proto = mpls_push->mpls_ethertype;
1743 action->mpls.label = mpls_lse_to_label(mpls_push->mpls_lse);
1744 action->mpls.tc = mpls_lse_to_tc(mpls_push->mpls_lse);
1745 action->mpls.ttl = mpls_lse_to_ttl(mpls_push->mpls_lse);
1746 action->mpls.bos = mpls_lse_to_bos(mpls_push->mpls_lse);
1747 action->type = TC_ACT_MPLS_PUSH;
1748 flower.action_count++;
1749 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_MPLS) {
1750 action->mpls.proto = nl_attr_get_be16(nla);
1751 action->type = TC_ACT_MPLS_POP;
1752 flower.action_count++;
1753 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET) {
1754 const struct nlattr *set = nl_attr_get(nla);
1755 const size_t set_len = nl_attr_get_size(nla);
1756
1757 err = parse_put_flow_set_action(&flower, action, set, set_len);
1758 if (err) {
1759 return err;
1760 }
1761 if (action->type == TC_ACT_ENCAP) {
1762 action->encap.tp_dst = info->tp_dst_port;
1763 action->encap.no_csum = !info->tunnel_csum_on;
1764 }
1765 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
1766 const struct nlattr *set = nl_attr_get(nla);
1767 const size_t set_len = nl_attr_get_size(nla);
1768
1769 err = parse_put_flow_set_masked_action(&flower, action, set,
1770 set_len, true);
1771 if (err) {
1772 return err;
1773 }
1774 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
1775 const struct nlattr *ct = nl_attr_get(nla);
1776 const size_t ct_len = nl_attr_get_size(nla);
1777
1778 err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
1779 if (err) {
1780 return err;
1781 }
1782 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
1783 action->type = TC_ACT_CT;
1784 action->ct.clear = true;
1785 flower.action_count++;
1786 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_RECIRC) {
1787 action->type = TC_ACT_GOTO;
1788 action->chain = nl_attr_get_u32(nla);
1789 flower.action_count++;
1790 recirc_act = true;
1791 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
1792 action->type = TC_ACT_GOTO;
1793 action->chain = 0; /* 0 is reserved and not used by recirc. */
1794 flower.action_count++;
1795 } else {
1796 VLOG_DBG_RL(&rl, "unsupported put action type: %d",
1797 nl_attr_type(nla));
1798 return EOPNOTSUPP;
1799 }
1800 }
1801
1802 if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
1803 VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
1804 return EOPNOTSUPP;
1805 }
1806
1807 if (get_ufid_tc_mapping(ufid, &id) == 0) {
1808 VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
1809 id.handle, id.prio);
1810 info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid);
1811 }
1812
1813 prio = get_prio_for_tc_flower(&flower);
1814 if (prio == 0) {
1815 VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
1816 return ENOSPC;
1817 }
1818
1819 flower.act_cookie.data = ufid;
1820 flower.act_cookie.len = sizeof *ufid;
1821
1822 block_id = get_block_id_from_netdev(netdev);
1823 id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
1824 err = tc_replace_flower(&id, &flower);
1825 if (!err) {
1826 if (stats) {
1827 memset(stats, 0, sizeof *stats);
1828 }
1829 add_ufid_tc_mapping(netdev, ufid, &id);
1830 }
1831
1832 return err;
1833 }
1834
1835 static int
1836 netdev_tc_flow_get(struct netdev *netdev,
1837 struct match *match,
1838 struct nlattr **actions,
1839 const ovs_u128 *ufid,
1840 struct dpif_flow_stats *stats,
1841 struct dpif_flow_attrs *attrs,
1842 struct ofpbuf *buf)
1843 {
1844 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1845 struct tc_flower flower;
1846 odp_port_t in_port;
1847 struct tcf_id id;
1848 int err;
1849
1850 err = get_ufid_tc_mapping(ufid, &id);
1851 if (err) {
1852 return err;
1853 }
1854
1855 VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d block_id %d)",
1856 netdev_get_name(netdev), id.prio, id.handle, id.block_id);
1857
1858 err = tc_get_flower(&id, &flower);
1859 if (err) {
1860 VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s",
1861 netdev_get_name(netdev), id.prio, id.handle,
1862 ovs_strerror(err));
1863 return err;
1864 }
1865
1866 in_port = netdev_ifindex_to_odp_port(id.ifindex);
1867 parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf, false);
1868
1869 match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
1870 match->flow.in_port.odp_port = in_port;
1871 match_set_recirc_id(match, id.chain);
1872
1873 return 0;
1874 }
1875
1876 static int
1877 netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
1878 const ovs_u128 *ufid,
1879 struct dpif_flow_stats *stats)
1880 {
1881 struct tc_flower flower;
1882 struct tcf_id id;
1883 int error;
1884
1885 error = get_ufid_tc_mapping(ufid, &id);
1886 if (error) {
1887 return error;
1888 }
1889
1890 if (stats) {
1891 memset(stats, 0, sizeof *stats);
1892 if (!tc_get_flower(&id, &flower)) {
1893 stats->n_packets = get_32aligned_u64(&flower.stats.n_packets);
1894 stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes);
1895 stats->used = flower.lastused;
1896 }
1897 }
1898
1899 error = del_filter_and_ufid_mapping(&id, ufid);
1900
1901 return error;
1902 }
1903
1904 static void
1905 probe_multi_mask_per_prio(int ifindex)
1906 {
1907 struct tc_flower flower;
1908 struct tcf_id id1, id2;
1909 int block_id = 0;
1910 int prio = 1;
1911 int error;
1912
1913 error = tc_add_del_qdisc(ifindex, true, block_id, TC_INGRESS);
1914 if (error) {
1915 return;
1916 }
1917
1918 memset(&flower, 0, sizeof flower);
1919
1920 flower.key.eth_type = htons(ETH_P_IP);
1921 flower.mask.eth_type = OVS_BE16_MAX;
1922 memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
1923 memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
1924
1925 id1 = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS);
1926 error = tc_replace_flower(&id1, &flower);
1927 if (error) {
1928 goto out;
1929 }
1930
1931 memset(&flower.key.src_mac, 0x11, sizeof flower.key.src_mac);
1932 memset(&flower.mask.src_mac, 0xff, sizeof flower.mask.src_mac);
1933
1934 id2 = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS);
1935 error = tc_replace_flower(&id2, &flower);
1936 tc_del_filter(&id1);
1937
1938 if (error) {
1939 goto out;
1940 }
1941
1942 tc_del_filter(&id2);
1943
1944 multi_mask_per_prio = true;
1945 VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
1946
1947 out:
1948 tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS);
1949 }
1950
1951 static void
1952 probe_tc_block_support(int ifindex)
1953 {
1954 struct tc_flower flower;
1955 uint32_t block_id = 1;
1956 struct tcf_id id;
1957 int prio = 0;
1958 int error;
1959
1960 error = tc_add_del_qdisc(ifindex, true, block_id, TC_INGRESS);
1961 if (error) {
1962 return;
1963 }
1964
1965 memset(&flower, 0, sizeof flower);
1966
1967 flower.key.eth_type = htons(ETH_P_IP);
1968 flower.mask.eth_type = OVS_BE16_MAX;
1969 memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
1970 memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
1971
1972 id = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS);
1973 error = tc_replace_flower(&id, &flower);
1974
1975 tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS);
1976
1977 if (!error) {
1978 block_support = true;
1979 VLOG_INFO("probe tc: block offload is supported.");
1980 }
1981 }
1982
1983 static int
1984 netdev_tc_init_flow_api(struct netdev *netdev)
1985 {
1986 static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER;
1987 static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER;
1988 enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
1989 uint32_t block_id = 0;
1990 struct tcf_id id;
1991 int ifindex;
1992 int error;
1993
1994 ifindex = netdev_get_ifindex(netdev);
1995 if (ifindex < 0) {
1996 VLOG_INFO("init: failed to get ifindex for %s: %s",
1997 netdev_get_name(netdev), ovs_strerror(-ifindex));
1998 return -ifindex;
1999 }
2000
2001 block_id = get_block_id_from_netdev(netdev);
2002
2003 /* Flush rules explicitly needed when we work with ingress_block,
2004 * so we will not fail with reattaching block to bond iface, for ex.
2005 */
2006 id = tc_make_tcf_id(ifindex, block_id, 0, hook);
2007 tc_del_filter(&id);
2008
2009 /* make sure there is no ingress/egress qdisc */
2010 tc_add_del_qdisc(ifindex, false, 0, hook);
2011
2012 if (ovsthread_once_start(&block_once)) {
2013 probe_tc_block_support(ifindex);
2014 /* Need to re-fetch block id as it depends on feature availability. */
2015 block_id = get_block_id_from_netdev(netdev);
2016 ovsthread_once_done(&block_once);
2017 }
2018
2019 if (ovsthread_once_start(&multi_mask_once)) {
2020 probe_multi_mask_per_prio(ifindex);
2021 ovsthread_once_done(&multi_mask_once);
2022 }
2023
2024 error = tc_add_del_qdisc(ifindex, true, block_id, hook);
2025
2026 if (error && error != EEXIST) {
2027 VLOG_INFO("failed adding ingress qdisc required for offloading: %s",
2028 ovs_strerror(error));
2029 return error;
2030 }
2031
2032 VLOG_INFO("added ingress qdisc to %s", netdev_get_name(netdev));
2033
2034 return 0;
2035 }
2036
2037 const struct netdev_flow_api netdev_offload_tc = {
2038 .type = "linux_tc",
2039 .flow_flush = netdev_tc_flow_flush,
2040 .flow_dump_create = netdev_tc_flow_dump_create,
2041 .flow_dump_destroy = netdev_tc_flow_dump_destroy,
2042 .flow_dump_next = netdev_tc_flow_dump_next,
2043 .flow_put = netdev_tc_flow_put,
2044 .flow_get = netdev_tc_flow_get,
2045 .flow_del = netdev_tc_flow_del,
2046 .init_flow_api = netdev_tc_init_flow_api,
2047 };