]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/conntrack.c
datapath: Handle NF_REPEAT in conntrack action.
[mirror_ovs.git] / datapath / conntrack.c
CommitLineData
a94ebc39
JS
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/kconfig.h>
15#include <linux/version.h>
16
8063e095 17#if IS_ENABLED(CONFIG_NF_CONNTRACK)
a94ebc39
JS
18
19#include <linux/module.h>
20#include <linux/openvswitch.h>
21#include <net/ip.h>
22#include <net/netfilter/nf_conntrack_core.h>
11251c17 23#include <net/netfilter/nf_conntrack_helper.h>
038e34ab 24#include <net/netfilter/nf_conntrack_labels.h>
a94ebc39
JS
25#include <net/netfilter/nf_conntrack_zones.h>
26#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
27
28#include "datapath.h"
29#include "conntrack.h"
30#include "flow.h"
31#include "flow_netlink.h"
86c2eb45 32#include "gso.h"
a94ebc39
JS
33
34struct ovs_ct_len_tbl {
35 size_t maxlen;
36 size_t minlen;
37};
38
372ce973
JS
39/* Metadata mark for masked write to conntrack mark */
40struct md_mark {
41 u32 value;
42 u32 mask;
43};
44
038e34ab 45/* Metadata label for masked write to conntrack label. */
c05e2094
JS
46struct md_labels {
47 struct ovs_key_ct_labels value;
48 struct ovs_key_ct_labels mask;
038e34ab
JS
49};
50
a94ebc39
JS
51/* Conntrack action context for execution. */
52struct ovs_conntrack_info {
11251c17 53 struct nf_conntrack_helper *helper;
a94ebc39
JS
54 struct nf_conntrack_zone zone;
55 struct nf_conn *ct;
c05e2094 56 u8 commit : 1;
a94ebc39 57 u16 family;
372ce973 58 struct md_mark mark;
c05e2094 59 struct md_labels labels;
a94ebc39
JS
60};
61
11251c17
JS
62static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
63
a94ebc39
JS
64static u16 key_to_nfproto(const struct sw_flow_key *key)
65{
66 switch (ntohs(key->eth.type)) {
67 case ETH_P_IP:
68 return NFPROTO_IPV4;
69 case ETH_P_IPV6:
70 return NFPROTO_IPV6;
71 default:
72 return NFPROTO_UNSPEC;
73 }
74}
75
76/* Map SKB connection state into the values used by flow definition. */
77static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
78{
79 u8 ct_state = OVS_CS_F_TRACKED;
80
81 switch (ctinfo) {
82 case IP_CT_ESTABLISHED_REPLY:
83 case IP_CT_RELATED_REPLY:
a94ebc39
JS
84 ct_state |= OVS_CS_F_REPLY_DIR;
85 break;
86 default:
87 break;
88 }
89
90 switch (ctinfo) {
91 case IP_CT_ESTABLISHED:
92 case IP_CT_ESTABLISHED_REPLY:
93 ct_state |= OVS_CS_F_ESTABLISHED;
94 break;
95 case IP_CT_RELATED:
96 case IP_CT_RELATED_REPLY:
97 ct_state |= OVS_CS_F_RELATED;
98 break;
99 case IP_CT_NEW:
a94ebc39
JS
100 ct_state |= OVS_CS_F_NEW;
101 break;
102 default:
103 break;
104 }
105
106 return ct_state;
107}
108
c05e2094
JS
109static u32 ovs_ct_get_mark(const struct nf_conn *ct)
110{
111#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
112 return ct ? ct->mark : 0;
113#else
114 return 0;
115#endif
116}
117
118static void ovs_ct_get_labels(const struct nf_conn *ct,
119 struct ovs_key_ct_labels *labels)
038e34ab
JS
120{
121 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
122
123 if (cl) {
124 size_t len = cl->words * sizeof(long);
125
c05e2094
JS
126 if (len > OVS_CT_LABELS_LEN)
127 len = OVS_CT_LABELS_LEN;
128 else if (len < OVS_CT_LABELS_LEN)
129 memset(labels, 0, OVS_CT_LABELS_LEN);
130 memcpy(labels, cl->bits, len);
038e34ab 131 } else {
c05e2094 132 memset(labels, 0, OVS_CT_LABELS_LEN);
038e34ab
JS
133 }
134}
135
a94ebc39 136static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
372ce973
JS
137 const struct nf_conntrack_zone *zone,
138 const struct nf_conn *ct)
a94ebc39
JS
139{
140 key->ct.state = state;
141 key->ct.zone = zone->id;
c05e2094
JS
142 key->ct.mark = ovs_ct_get_mark(ct);
143 ovs_ct_get_labels(ct, &key->ct.labels);
a94ebc39
JS
144}
145
146/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
147 * previously sent the packet to conntrack via the ct action.
148 */
149static void ovs_ct_update_key(const struct sk_buff *skb,
f23593a1 150 const struct ovs_conntrack_info *info,
a94ebc39
JS
151 struct sw_flow_key *key, bool post_ct)
152{
153 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
154 enum ip_conntrack_info ctinfo;
155 struct nf_conn *ct;
156 u8 state = 0;
157
158 ct = nf_ct_get(skb, &ctinfo);
159 if (ct) {
160 state = ovs_ct_get_state(ctinfo);
b0f251cd 161 /* All unconfirmed entries are NEW connections. */
c05e2094
JS
162 if (!nf_ct_is_confirmed(ct))
163 state |= OVS_CS_F_NEW;
b0f251cd
JR
164 /* OVS persists the related flag for the duration of the
165 * connection.
166 */
a94ebc39
JS
167 if (ct->master)
168 state |= OVS_CS_F_RELATED;
169 zone = nf_ct_zone(ct);
170 } else if (post_ct) {
171 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
f23593a1
JS
172 if (info)
173 zone = &info->zone;
a94ebc39 174 }
372ce973 175 __ovs_ct_update_key(key, state, zone, ct);
a94ebc39
JS
176}
177
b0f251cd
JR
178/* This is called to initialize CT key fields possibly coming in from the local
179 * stack.
180 */
a94ebc39
JS
181void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
182{
f23593a1 183 ovs_ct_update_key(skb, NULL, key, false);
a94ebc39
JS
184}
185
186int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
187{
c05e2094 188 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
a94ebc39
JS
189 return -EMSGSIZE;
190
191 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
192 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
193 return -EMSGSIZE;
194
372ce973
JS
195 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
196 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
197 return -EMSGSIZE;
198
c05e2094
JS
199 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
200 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
201 &key->ct.labels))
038e34ab
JS
202 return -EMSGSIZE;
203
372ce973
JS
204 return 0;
205}
206
207static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
208 u32 ct_mark, u32 mask)
209{
c05e2094 210#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
372ce973
JS
211 enum ip_conntrack_info ctinfo;
212 struct nf_conn *ct;
213 u32 new_mark;
214
372ce973
JS
215 /* The connection could be invalid, in which case set_mark is no-op. */
216 ct = nf_ct_get(skb, &ctinfo);
217 if (!ct)
218 return 0;
219
220 new_mark = ct_mark | (ct->mark & ~(mask));
221 if (ct->mark != new_mark) {
222 ct->mark = new_mark;
223 nf_conntrack_event_cache(IPCT_MARK, ct);
224 key->ct.mark = new_mark;
225 }
226
a94ebc39 227 return 0;
c05e2094
JS
228#else
229 return -ENOTSUPP;
230#endif
a94ebc39
JS
231}
232
c05e2094
JS
233static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
234 const struct ovs_key_ct_labels *labels,
235 const struct ovs_key_ct_labels *mask)
038e34ab
JS
236{
237 enum ip_conntrack_info ctinfo;
238 struct nf_conn_labels *cl;
239 struct nf_conn *ct;
240 int err;
241
038e34ab
JS
242 /* The connection could be invalid, in which case set_label is no-op.*/
243 ct = nf_ct_get(skb, &ctinfo);
244 if (!ct)
245 return 0;
246
247 cl = nf_ct_labels_find(ct);
248 if (!cl) {
249 nf_ct_labels_ext_add(ct);
250 cl = nf_ct_labels_find(ct);
251 }
c05e2094 252 if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
038e34ab
JS
253 return -ENOSPC;
254
c05e2094
JS
255 err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
256 OVS_CT_LABELS_LEN / sizeof(u32));
038e34ab
JS
257 if (err)
258 return err;
259
c05e2094 260 ovs_ct_get_labels(ct, &key->ct.labels);
038e34ab
JS
261 return 0;
262}
263
11251c17
JS
264/* 'skb' should already be pulled to nh_ofs. */
265static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
266{
267 const struct nf_conntrack_helper *helper;
268 const struct nf_conn_help *help;
269 enum ip_conntrack_info ctinfo;
270 unsigned int protoff;
271 struct nf_conn *ct;
272
273 ct = nf_ct_get(skb, &ctinfo);
274 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
275 return NF_ACCEPT;
276
277 help = nfct_help(ct);
278 if (!help)
279 return NF_ACCEPT;
280
281 helper = rcu_dereference(help->helper);
282 if (!helper)
283 return NF_ACCEPT;
284
285 switch (proto) {
286 case NFPROTO_IPV4:
287 protoff = ip_hdrlen(skb);
288 break;
289 case NFPROTO_IPV6: {
290 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
291 __be16 frag_off;
c05e2094 292 int ofs;
11251c17 293
c05e2094
JS
294 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
295 &frag_off);
296 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
11251c17
JS
297 pr_debug("proto header not found\n");
298 return NF_ACCEPT;
299 }
c05e2094 300 protoff = ofs;
11251c17
JS
301 break;
302 }
303 default:
304 WARN_ONCE(1, "helper invoked on non-IP family!");
305 return NF_DROP;
306 }
307
308 return helper->help(skb, protoff, ct, ctinfo);
309}
310
c05e2094
JS
311/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
312 * value if 'skb' is freed.
313 */
a94ebc39
JS
314static int handle_fragments(struct net *net, struct sw_flow_key *key,
315 u16 zone, struct sk_buff *skb)
316{
86c2eb45 317 struct ovs_gso_cb ovs_cb = *OVS_GSO_CB(skb);
2e602ea3 318 int err;
a94ebc39
JS
319
320 if (!skb->dev) {
321 OVS_NLERR(true, "%s: skb has no dev; dropping", __func__);
322 return -EINVAL;
323 }
324
325 if (key->eth.type == htons(ETH_P_IP)) {
326 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
a94ebc39
JS
327
328 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
39c0ff22 329 err = ip_defrag(net, skb, user);
a94ebc39
JS
330 if (err)
331 return err;
332
86c2eb45 333 ovs_cb.dp_cb.mru = IPCB(skb)->frag_max_size;
a94ebc39 334#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
c05e2094 335 } else if (key->eth.type == htons(ETH_P_IPV6)) {
a94ebc39 336 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
a94ebc39 337
66ec6da8 338 skb_orphan(skb);
a94ebc39 339 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
2e602ea3
JS
340 err = nf_ct_frag6_gather(net, skb, user);
341 if (err)
342 return err;
a94ebc39 343
2e602ea3 344 key->ip.proto = ipv6_hdr(skb)->nexthdr;
86c2eb45 345 ovs_cb.dp_cb.mru = IP6CB(skb)->frag_max_size;
a94ebc39
JS
346#endif /* IP frag support */
347 } else {
c05e2094 348 kfree_skb(skb);
a94ebc39
JS
349 return -EPFNOSUPPORT;
350 }
351
352 key->ip.frag = OVS_FRAG_TYPE_NONE;
353 skb_clear_hash(skb);
354 skb->ignore_df = 1;
86c2eb45 355 *OVS_GSO_CB(skb) = ovs_cb;
a94ebc39
JS
356
357 return 0;
358}
359
360static struct nf_conntrack_expect *
361ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
362 u16 proto, const struct sk_buff *skb)
363{
364 struct nf_conntrack_tuple tuple;
365
fa67f8e0 366 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
a94ebc39
JS
367 return NULL;
368 return __nf_ct_expect_find(net, zone, &tuple);
369}
370
3dd9e118
JR
371/* This replicates logic from nf_conntrack_core.c that is not exported. */
372static enum ip_conntrack_info
373ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
374{
375 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
376
377 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
378 return IP_CT_ESTABLISHED_REPLY;
379 /* Once we've had two way comms, always ESTABLISHED. */
380 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
381 return IP_CT_ESTABLISHED;
382 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
383 return IP_CT_RELATED;
384 return IP_CT_NEW;
385}
386
387/* Find an existing connection which this packet belongs to without
388 * re-attributing statistics or modifying the connection state. This allows an
389 * skb->nfct lost due to an upcall to be recovered during actions execution.
390 *
391 * Must be called with rcu_read_lock.
392 *
393 * On success, populates skb->nfct and skb->nfctinfo, and returns the
394 * connection. Returns NULL if there is no existing entry.
395 */
396static struct nf_conn *
397ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
398 u8 l3num, struct sk_buff *skb)
399{
400 struct nf_conntrack_l3proto *l3proto;
401 struct nf_conntrack_l4proto *l4proto;
402 struct nf_conntrack_tuple tuple;
403 struct nf_conntrack_tuple_hash *h;
404 enum ip_conntrack_info ctinfo;
405 struct nf_conn *ct;
406 unsigned int dataoff;
407 u8 protonum;
408
409 l3proto = __nf_ct_l3proto_find(l3num);
410 if (!l3proto) {
411 pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
412 return NULL;
413 }
414 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
415 &protonum) <= 0) {
416 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
417 return NULL;
418 }
419 l4proto = __nf_ct_l4proto_find(l3num, protonum);
420 if (!l4proto) {
421 pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
422 return NULL;
423 }
424 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
425 protonum, net, &tuple, l3proto, l4proto)) {
426 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
427 return NULL;
428 }
429
430 /* look for tuple match */
431 h = nf_conntrack_find_get(net, zone, &tuple);
432 if (!h)
433 return NULL; /* Not found. */
434
435 ct = nf_ct_tuplehash_to_ctrack(h);
436
437 ctinfo = ovs_ct_get_info(h);
438 if (ctinfo == IP_CT_NEW) {
439 /* This should not happen. */
440 WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
441 }
442 skb->nfct = &ct->ct_general;
443 skb->nfctinfo = ctinfo;
444 return ct;
445}
446
a94ebc39 447/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
3dd9e118
JR
448static bool skb_nfct_cached(struct net *net,
449 const struct sw_flow_key *key,
450 const struct ovs_conntrack_info *info,
451 struct sk_buff *skb)
a94ebc39
JS
452{
453 enum ip_conntrack_info ctinfo;
454 struct nf_conn *ct;
455
456 ct = nf_ct_get(skb, &ctinfo);
3dd9e118
JR
457 /* If no ct, check if we have evidence that an existing conntrack entry
458 * might be found for this skb. This happens when we lose a skb->nfct
459 * due to an upcall. If the connection was not confirmed, it is not
460 * cached and needs to be run through conntrack again.
461 */
462 if (!ct && key->ct.state & OVS_CS_F_TRACKED &&
463 !(key->ct.state & OVS_CS_F_INVALID) &&
464 key->ct.zone == info->zone.id)
465 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb);
a94ebc39
JS
466 if (!ct)
467 return false;
468 if (!net_eq(net, read_pnet(&ct->ct_net)))
469 return false;
470 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
471 return false;
11251c17
JS
472 if (info->helper) {
473 struct nf_conn_help *help;
474
475 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
476 if (help && rcu_access_pointer(help->helper) != info->helper)
477 return false;
478 }
a94ebc39
JS
479
480 return true;
481}
482
b0f251cd 483/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
a04a5794
JR
484 * not done already. Update key with new CT state after passing the packet
485 * through conntrack.
b0f251cd
JR
486 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
487 * set to NULL and 0 will be returned.
488 */
c05e2094 489static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
a94ebc39
JS
490 const struct ovs_conntrack_info *info,
491 struct sk_buff *skb)
492{
493 /* If we are recirculating packets to match on conntrack fields and
494 * committing with a separate conntrack action, then we don't need to
495 * actually run the packet through conntrack twice unless it's for a
496 * different zone.
497 */
3dd9e118 498 if (!skb_nfct_cached(net, key, info, skb)) {
a94ebc39 499 struct nf_conn *tmpl = info->ct;
9bf67b92 500 int err;
a94ebc39
JS
501
502 /* Associate skb with specified zone. */
503 if (tmpl) {
504 if (skb->nfct)
505 nf_conntrack_put(skb->nfct);
506 nf_conntrack_get(&tmpl->ct_general);
507 skb->nfct = &tmpl->ct_general;
508 skb->nfctinfo = IP_CT_NEW;
509 }
510
9bf67b92
JR
511 /* Repeat if requested, see nf_iterate(). */
512 do {
513 err = nf_conntrack_in(net, info->family,
514 NF_INET_FORWARD, skb);
515 } while (err == NF_REPEAT);
516
517 if (err != NF_ACCEPT)
a94ebc39 518 return -ENOENT;
11251c17 519
a04a5794
JR
520 ovs_ct_update_key(skb, info, key, true);
521
11251c17
JS
522 if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
523 WARN_ONCE(1, "helper rejected packet");
524 return -EINVAL;
525 }
a94ebc39
JS
526 }
527
528 return 0;
529}
530
531/* Lookup connection and read fields into key. */
532static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
533 const struct ovs_conntrack_info *info,
534 struct sk_buff *skb)
535{
536 struct nf_conntrack_expect *exp;
537
b0f251cd
JR
538 /* If we pass an expected packet through nf_conntrack_in() the
539 * expectation is typically removed, but the packet could still be
540 * lost in upcall processing. To prevent this from happening we
541 * perform an explicit expectation lookup. Expected connections are
542 * always new, and will be passed through conntrack only when they are
543 * committed, as it is OK to remove the expectation at that time.
544 */
a94ebc39
JS
545 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
546 if (exp) {
547 u8 state;
548
549 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
372ce973 550 __ovs_ct_update_key(key, state, &info->zone, exp->master);
a94ebc39
JS
551 } else {
552 int err;
553
554 err = __ovs_ct_lookup(net, key, info, skb);
555 if (err)
556 return err;
a94ebc39
JS
557 }
558
559 return 0;
560}
561
562/* Lookup connection and confirm if unconfirmed. */
563static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
564 const struct ovs_conntrack_info *info,
565 struct sk_buff *skb)
566{
a94ebc39
JS
567 int err;
568
a94ebc39
JS
569 err = __ovs_ct_lookup(net, key, info, skb);
570 if (err)
571 return err;
b0f251cd 572 /* This is a no-op if the connection has already been confirmed. */
a94ebc39
JS
573 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
574 return -EINVAL;
575
a94ebc39
JS
576 return 0;
577}
578
c05e2094 579static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
038e34ab
JS
580{
581 size_t i;
582
c05e2094
JS
583 for (i = 0; i < sizeof(*labels); i++)
584 if (labels->ct_labels[i])
038e34ab
JS
585 return true;
586
587 return false;
588}
589
c05e2094
JS
590/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
591 * value if 'skb' is freed.
592 */
a94ebc39
JS
593int ovs_ct_execute(struct net *net, struct sk_buff *skb,
594 struct sw_flow_key *key,
595 const struct ovs_conntrack_info *info)
596{
597 int nh_ofs;
598 int err;
599
600 /* The conntrack module expects to be working at L3. */
601 nh_ofs = skb_network_offset(skb);
602 skb_pull(skb, nh_ofs);
603
604 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
605 err = handle_fragments(net, key, info->zone.id, skb);
606 if (err)
607 return err;
608 }
609
c05e2094 610 if (info->commit)
a94ebc39
JS
611 err = ovs_ct_commit(net, key, info, skb);
612 else
613 err = ovs_ct_lookup(net, key, info, skb);
372ce973
JS
614 if (err)
615 goto err;
a94ebc39 616
038e34ab 617 if (info->mark.mask) {
372ce973
JS
618 err = ovs_ct_set_mark(skb, key, info->mark.value,
619 info->mark.mask);
038e34ab
JS
620 if (err)
621 goto err;
622 }
c05e2094
JS
623 if (labels_nonzero(&info->labels.mask))
624 err = ovs_ct_set_labels(skb, key, &info->labels.value,
625 &info->labels.mask);
372ce973 626err:
a94ebc39 627 skb_push(skb, nh_ofs);
c05e2094
JS
628 if (err)
629 kfree_skb(skb);
a94ebc39
JS
630 return err;
631}
632
11251c17
JS
633static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
634 const struct sw_flow_key *key, bool log)
635{
636 struct nf_conntrack_helper *helper;
637 struct nf_conn_help *help;
638
639 helper = nf_conntrack_helper_try_module_get(name, info->family,
640 key->ip.proto);
641 if (!helper) {
642 OVS_NLERR(log, "Unknown helper \"%s\"", name);
643 return -EINVAL;
644 }
645
646 help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
647 if (!help) {
648 module_put(helper->me);
649 return -ENOMEM;
650 }
651
652 rcu_assign_pointer(help->helper, helper);
653 info->helper = helper;
654 return 0;
655}
656
a94ebc39 657static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
c05e2094 658 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
a94ebc39
JS
659 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
660 .maxlen = sizeof(u16) },
372ce973
JS
661 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
662 .maxlen = sizeof(struct md_mark) },
c05e2094
JS
663 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
664 .maxlen = sizeof(struct md_labels) },
11251c17
JS
665 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
666 .maxlen = NF_CT_HELPER_NAME_LEN }
a94ebc39
JS
667};
668
669static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
11251c17 670 const char **helper, bool log)
a94ebc39
JS
671{
672 struct nlattr *a;
673 int rem;
674
675 nla_for_each_nested(a, attr, rem) {
676 int type = nla_type(a);
677 int maxlen = ovs_ct_attr_lens[type].maxlen;
678 int minlen = ovs_ct_attr_lens[type].minlen;
679
680 if (type > OVS_CT_ATTR_MAX) {
681 OVS_NLERR(log,
682 "Unknown conntrack attr (type=%d, max=%d)",
683 type, OVS_CT_ATTR_MAX);
684 return -EINVAL;
685 }
686 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
687 OVS_NLERR(log,
688 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
689 type, nla_len(a), maxlen);
690 return -EINVAL;
691 }
692
693 switch (type) {
c05e2094
JS
694 case OVS_CT_ATTR_COMMIT:
695 info->commit = true;
a94ebc39
JS
696 break;
697#ifdef CONFIG_NF_CONNTRACK_ZONES
698 case OVS_CT_ATTR_ZONE:
699 info->zone.id = nla_get_u16(a);
700 break;
372ce973
JS
701#endif
702#ifdef CONFIG_NF_CONNTRACK_MARK
703 case OVS_CT_ATTR_MARK: {
704 struct md_mark *mark = nla_data(a);
705
c05e2094
JS
706 if (!mark->mask) {
707 OVS_NLERR(log, "ct_mark mask cannot be 0");
708 return -EINVAL;
709 }
372ce973
JS
710 info->mark = *mark;
711 break;
712 }
038e34ab
JS
713#endif
714#ifdef CONFIG_NF_CONNTRACK_LABELS
c05e2094
JS
715 case OVS_CT_ATTR_LABELS: {
716 struct md_labels *labels = nla_data(a);
038e34ab 717
c05e2094
JS
718 if (!labels_nonzero(&labels->mask)) {
719 OVS_NLERR(log, "ct_labels mask cannot be 0");
720 return -EINVAL;
721 }
722 info->labels = *labels;
038e34ab
JS
723 break;
724 }
a94ebc39 725#endif
11251c17
JS
726 case OVS_CT_ATTR_HELPER:
727 *helper = nla_data(a);
728 if (!memchr(*helper, '\0', nla_len(a))) {
729 OVS_NLERR(log, "Invalid conntrack helper");
730 return -EINVAL;
731 }
732 break;
a94ebc39
JS
733 default:
734 OVS_NLERR(log, "Unknown conntrack attr (%d)",
735 type);
736 return -EINVAL;
737 }
738 }
739
740 if (rem > 0) {
741 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
742 return -EINVAL;
743 }
744
745 return 0;
746}
747
038e34ab 748bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
a94ebc39
JS
749{
750 if (attr == OVS_KEY_ATTR_CT_STATE)
751 return true;
752 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
753 attr == OVS_KEY_ATTR_CT_ZONE)
754 return true;
372ce973
JS
755 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
756 attr == OVS_KEY_ATTR_CT_MARK)
757 return true;
038e34ab 758 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
c05e2094 759 attr == OVS_KEY_ATTR_CT_LABELS) {
038e34ab
JS
760 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
761
762 return ovs_net->xt_label;
763 }
a94ebc39
JS
764
765 return false;
766}
767
768int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
769 const struct sw_flow_key *key,
770 struct sw_flow_actions **sfa, bool log)
771{
772 struct ovs_conntrack_info ct_info;
11251c17 773 const char *helper = NULL;
a94ebc39
JS
774 u16 family;
775 int err;
776
777 family = key_to_nfproto(key);
778 if (family == NFPROTO_UNSPEC) {
779 OVS_NLERR(log, "ct family unspecified");
780 return -EINVAL;
781 }
782
783 memset(&ct_info, 0, sizeof(ct_info));
784 ct_info.family = family;
785
786 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
787 NF_CT_DEFAULT_ZONE_DIR, 0);
788
11251c17 789 err = parse_ct(attr, &ct_info, &helper, log);
a94ebc39
JS
790 if (err)
791 return err;
792
793 /* Set up template for tracking connections in specific zones. */
794 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
795 if (!ct_info.ct) {
796 OVS_NLERR(log, "Failed to allocate conntrack template");
797 return -ENOMEM;
798 }
a3a68d63
JS
799
800 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
801 nf_conntrack_get(&ct_info.ct->ct_general);
802
11251c17
JS
803 if (helper) {
804 err = ovs_ct_add_helper(&ct_info, helper, key, log);
805 if (err)
806 goto err_free_ct;
807 }
a94ebc39
JS
808
809 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
810 sizeof(ct_info), log);
811 if (err)
812 goto err_free_ct;
813
a94ebc39
JS
814 return 0;
815err_free_ct:
11251c17 816 __ovs_ct_free_action(&ct_info);
a94ebc39
JS
817 return err;
818}
819
820int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
821 struct sk_buff *skb)
822{
823 struct nlattr *start;
824
825 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
826 if (!start)
827 return -EMSGSIZE;
828
c05e2094 829 if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
a94ebc39
JS
830 return -EMSGSIZE;
831 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
832 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
833 return -EMSGSIZE;
c05e2094 834 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
372ce973
JS
835 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
836 &ct_info->mark))
837 return -EMSGSIZE;
038e34ab 838 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
c05e2094
JS
839 labels_nonzero(&ct_info->labels.mask) &&
840 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
841 &ct_info->labels))
038e34ab 842 return -EMSGSIZE;
11251c17
JS
843 if (ct_info->helper) {
844 if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
845 ct_info->helper->name))
846 return -EMSGSIZE;
847 }
a94ebc39
JS
848
849 nla_nest_end(skb, start);
850
851 return 0;
852}
853
854void ovs_ct_free_action(const struct nlattr *a)
855{
856 struct ovs_conntrack_info *ct_info = nla_data(a);
857
11251c17
JS
858 __ovs_ct_free_action(ct_info);
859}
860
861static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
862{
863 if (ct_info->helper)
864 module_put(ct_info->helper->me);
a94ebc39
JS
865 if (ct_info->ct)
866 nf_ct_tmpl_free(ct_info->ct);
867}
868
038e34ab
JS
869void ovs_ct_init(struct net *net)
870{
c05e2094 871 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
038e34ab
JS
872 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
873
874 if (nf_connlabels_get(net, n_bits)) {
875 ovs_net->xt_label = false;
876 OVS_NLERR(true, "Failed to set connlabel length");
877 } else {
878 ovs_net->xt_label = true;
879 }
880}
881
882void ovs_ct_exit(struct net *net)
883{
884 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
885
886 if (ovs_net->xt_label)
887 nf_connlabels_put(net);
888}
889
8063e095 890#endif /* CONFIG_NF_CONNTRACK */