2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key
:8;
28 enum ip_conntrack_dir dir
:8;
30 enum nft_registers dreg
:8;
31 enum nft_registers sreg
:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn
*, nft_ct_pcpu_template
);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly
;
40 static u64
nft_ct_get_eval_counter(const struct nf_conn_counter
*c
,
42 enum ip_conntrack_dir d
)
44 if (d
< IP_CT_DIR_MAX
)
45 return k
== NFT_CT_BYTES
? atomic64_read(&c
[d
].bytes
) :
46 atomic64_read(&c
[d
].packets
);
48 return nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_ORIGINAL
) +
49 nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_REPLY
);
52 static void nft_ct_get_eval(const struct nft_expr
*expr
,
53 struct nft_regs
*regs
,
54 const struct nft_pktinfo
*pkt
)
56 const struct nft_ct
*priv
= nft_expr_priv(expr
);
57 u32
*dest
= ®s
->data
[priv
->dreg
];
58 enum ip_conntrack_info ctinfo
;
59 const struct nf_conn
*ct
;
60 const struct nf_conn_help
*help
;
61 const struct nf_conntrack_tuple
*tuple
;
62 const struct nf_conntrack_helper
*helper
;
65 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
70 state
= NF_CT_STATE_INVALID_BIT
;
71 else if (nf_ct_is_untracked(ct
))
72 state
= NF_CT_STATE_UNTRACKED_BIT
;
74 state
= NF_CT_STATE_BIT(ctinfo
);
85 case NFT_CT_DIRECTION
:
86 *dest
= CTINFO2DIR(ctinfo
);
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION
:
102 *dest
= jiffies_to_msecs(nf_ct_expires(ct
));
105 if (ct
->master
== NULL
)
107 help
= nfct_help(ct
->master
);
110 helper
= rcu_dereference(help
->helper
);
113 strncpy((char *)dest
, helper
->name
, NF_CT_HELPER_NAME_LEN
);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS
: {
117 struct nf_conn_labels
*labels
= nf_ct_labels_find(ct
);
120 memcpy(dest
, labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
122 memset(dest
, 0, NF_CT_LABELS_MAX_SIZE
);
126 case NFT_CT_BYTES
: /* fallthrough */
128 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
132 count
= nft_ct_get_eval_counter(acct
->counter
,
133 priv
->key
, priv
->dir
);
134 memcpy(dest
, &count
, sizeof(count
));
137 case NFT_CT_AVGPKT
: {
138 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
139 u64 avgcnt
= 0, bcnt
= 0, pcnt
= 0;
142 pcnt
= nft_ct_get_eval_counter(acct
->counter
,
143 NFT_CT_PKTS
, priv
->dir
);
144 bcnt
= nft_ct_get_eval_counter(acct
->counter
,
145 NFT_CT_BYTES
, priv
->dir
);
147 avgcnt
= div64_u64(bcnt
, pcnt
);
150 memcpy(dest
, &avgcnt
, sizeof(avgcnt
));
153 case NFT_CT_L3PROTOCOL
:
154 *dest
= nf_ct_l3num(ct
);
156 case NFT_CT_PROTOCOL
:
157 *dest
= nf_ct_protonum(ct
);
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone
*zone
= nf_ct_zone(ct
);
163 if (priv
->dir
< IP_CT_DIR_MAX
)
164 *dest
= nf_ct_zone_id(zone
, priv
->dir
);
175 tuple
= &ct
->tuplehash
[priv
->dir
].tuple
;
178 memcpy(dest
, tuple
->src
.u3
.all
,
179 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
182 memcpy(dest
, tuple
->dst
.u3
.all
,
183 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
185 case NFT_CT_PROTO_SRC
:
186 *dest
= (__force __u16
)tuple
->src
.u
.all
;
188 case NFT_CT_PROTO_DST
:
189 *dest
= (__force __u16
)tuple
->dst
.u
.all
;
196 regs
->verdict
.code
= NFT_BREAK
;
199 #ifdef CONFIG_NF_CONNTRACK_ZONES
200 static void nft_ct_set_zone_eval(const struct nft_expr
*expr
,
201 struct nft_regs
*regs
,
202 const struct nft_pktinfo
*pkt
)
204 struct nf_conntrack_zone zone
= { .dir
= NF_CT_DEFAULT_ZONE_DIR
};
205 const struct nft_ct
*priv
= nft_expr_priv(expr
);
206 struct sk_buff
*skb
= pkt
->skb
;
207 enum ip_conntrack_info ctinfo
;
208 u16 value
= regs
->data
[priv
->sreg
];
211 ct
= nf_ct_get(skb
, &ctinfo
);
212 if (ct
) /* already tracked */
218 case IP_CT_DIR_ORIGINAL
:
219 zone
.dir
= NF_CT_ZONE_DIR_ORIG
;
221 case IP_CT_DIR_REPLY
:
222 zone
.dir
= NF_CT_ZONE_DIR_REPL
;
228 ct
= this_cpu_read(nft_ct_pcpu_template
);
230 if (likely(atomic_read(&ct
->ct_general
.use
) == 1)) {
231 nf_ct_zone_add(ct
, &zone
);
233 /* previous skb got queued to userspace */
234 ct
= nf_ct_tmpl_alloc(nft_net(pkt
), &zone
, GFP_ATOMIC
);
236 regs
->verdict
.code
= NF_DROP
;
241 atomic_inc(&ct
->ct_general
.use
);
242 nf_ct_set(skb
, ct
, IP_CT_NEW
);
246 static void nft_ct_set_eval(const struct nft_expr
*expr
,
247 struct nft_regs
*regs
,
248 const struct nft_pktinfo
*pkt
)
250 const struct nft_ct
*priv
= nft_expr_priv(expr
);
251 struct sk_buff
*skb
= pkt
->skb
;
252 #ifdef CONFIG_NF_CONNTRACK_MARK
253 u32 value
= regs
->data
[priv
->sreg
];
255 enum ip_conntrack_info ctinfo
;
258 ct
= nf_ct_get(skb
, &ctinfo
);
263 #ifdef CONFIG_NF_CONNTRACK_MARK
265 if (ct
->mark
!= value
) {
267 nf_conntrack_event_cache(IPCT_MARK
, ct
);
271 #ifdef CONFIG_NF_CONNTRACK_LABELS
273 nf_connlabels_replace(ct
,
274 ®s
->data
[priv
->sreg
],
275 ®s
->data
[priv
->sreg
],
276 NF_CT_LABELS_MAX_SIZE
/ sizeof(u32
));
284 static const struct nla_policy nft_ct_policy
[NFTA_CT_MAX
+ 1] = {
285 [NFTA_CT_DREG
] = { .type
= NLA_U32
},
286 [NFTA_CT_KEY
] = { .type
= NLA_U32
},
287 [NFTA_CT_DIRECTION
] = { .type
= NLA_U8
},
288 [NFTA_CT_SREG
] = { .type
= NLA_U32
},
291 static int nft_ct_netns_get(struct net
*net
, uint8_t family
)
295 if (family
== NFPROTO_INET
) {
296 err
= nf_ct_netns_get(net
, NFPROTO_IPV4
);
299 err
= nf_ct_netns_get(net
, NFPROTO_IPV6
);
303 err
= nf_ct_netns_get(net
, family
);
310 nf_ct_netns_put(net
, NFPROTO_IPV4
);
315 static void nft_ct_netns_put(struct net
*net
, uint8_t family
)
317 if (family
== NFPROTO_INET
) {
318 nf_ct_netns_put(net
, NFPROTO_IPV4
);
319 nf_ct_netns_put(net
, NFPROTO_IPV6
);
321 nf_ct_netns_put(net
, family
);
324 #ifdef CONFIG_NF_CONNTRACK_ZONES
325 static void nft_ct_tmpl_put_pcpu(void)
330 for_each_possible_cpu(cpu
) {
331 ct
= per_cpu(nft_ct_pcpu_template
, cpu
);
335 per_cpu(nft_ct_pcpu_template
, cpu
) = NULL
;
339 static bool nft_ct_tmpl_alloc_pcpu(void)
341 struct nf_conntrack_zone zone
= { .id
= 0 };
345 if (nft_ct_pcpu_template_refcnt
)
348 for_each_possible_cpu(cpu
) {
349 tmp
= nf_ct_tmpl_alloc(&init_net
, &zone
, GFP_KERNEL
);
351 nft_ct_tmpl_put_pcpu();
355 atomic_set(&tmp
->ct_general
.use
, 1);
356 per_cpu(nft_ct_pcpu_template
, cpu
) = tmp
;
363 static int nft_ct_get_init(const struct nft_ctx
*ctx
,
364 const struct nft_expr
*expr
,
365 const struct nlattr
* const tb
[])
367 struct nft_ct
*priv
= nft_expr_priv(expr
);
371 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
372 priv
->dir
= IP_CT_DIR_MAX
;
374 case NFT_CT_DIRECTION
:
375 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
381 #ifdef CONFIG_NF_CONNTRACK_MARK
384 #ifdef CONFIG_NF_CONNTRACK_SECMARK
387 case NFT_CT_EXPIRATION
:
388 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
392 #ifdef CONFIG_NF_CONNTRACK_LABELS
394 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
396 len
= NF_CT_LABELS_MAX_SIZE
;
400 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
402 len
= NF_CT_HELPER_NAME_LEN
;
405 case NFT_CT_L3PROTOCOL
:
406 case NFT_CT_PROTOCOL
:
407 /* For compatibility, do not report error if NFTA_CT_DIRECTION
408 * attribute is specified.
414 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
417 switch (ctx
->afi
->family
) {
419 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
424 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
428 return -EAFNOSUPPORT
;
431 case NFT_CT_PROTO_SRC
:
432 case NFT_CT_PROTO_DST
:
433 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
435 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
, src
.u
.all
);
442 #ifdef CONFIG_NF_CONNTRACK_ZONES
451 if (tb
[NFTA_CT_DIRECTION
] != NULL
) {
452 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
454 case IP_CT_DIR_ORIGINAL
:
455 case IP_CT_DIR_REPLY
:
462 priv
->dreg
= nft_parse_register(tb
[NFTA_CT_DREG
]);
463 err
= nft_validate_register_store(ctx
, priv
->dreg
, NULL
,
464 NFT_DATA_VALUE
, len
);
468 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
472 if (priv
->key
== NFT_CT_BYTES
||
473 priv
->key
== NFT_CT_PKTS
||
474 priv
->key
== NFT_CT_AVGPKT
)
475 nf_ct_set_acct(ctx
->net
, true);
480 static void __nft_ct_set_destroy(const struct nft_ctx
*ctx
, struct nft_ct
*priv
)
483 #ifdef CONFIG_NF_CONNTRACK_LABELS
485 nf_connlabels_put(ctx
->net
);
488 #ifdef CONFIG_NF_CONNTRACK_ZONES
490 if (--nft_ct_pcpu_template_refcnt
== 0)
491 nft_ct_tmpl_put_pcpu();
498 static int nft_ct_set_init(const struct nft_ctx
*ctx
,
499 const struct nft_expr
*expr
,
500 const struct nlattr
* const tb
[])
502 struct nft_ct
*priv
= nft_expr_priv(expr
);
506 priv
->dir
= IP_CT_DIR_MAX
;
507 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
509 #ifdef CONFIG_NF_CONNTRACK_MARK
511 if (tb
[NFTA_CT_DIRECTION
])
513 len
= FIELD_SIZEOF(struct nf_conn
, mark
);
516 #ifdef CONFIG_NF_CONNTRACK_LABELS
518 if (tb
[NFTA_CT_DIRECTION
])
520 len
= NF_CT_LABELS_MAX_SIZE
;
521 err
= nf_connlabels_get(ctx
->net
, (len
* BITS_PER_BYTE
) - 1);
526 #ifdef CONFIG_NF_CONNTRACK_ZONES
528 if (!nft_ct_tmpl_alloc_pcpu())
530 nft_ct_pcpu_template_refcnt
++;
537 if (tb
[NFTA_CT_DIRECTION
]) {
538 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
540 case IP_CT_DIR_ORIGINAL
:
541 case IP_CT_DIR_REPLY
:
548 priv
->sreg
= nft_parse_register(tb
[NFTA_CT_SREG
]);
549 err
= nft_validate_register_load(priv
->sreg
, len
);
553 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
560 __nft_ct_set_destroy(ctx
, priv
);
564 static void nft_ct_get_destroy(const struct nft_ctx
*ctx
,
565 const struct nft_expr
*expr
)
567 nf_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
570 static void nft_ct_set_destroy(const struct nft_ctx
*ctx
,
571 const struct nft_expr
*expr
)
573 struct nft_ct
*priv
= nft_expr_priv(expr
);
575 __nft_ct_set_destroy(ctx
, priv
);
576 nft_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
579 static int nft_ct_get_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
581 const struct nft_ct
*priv
= nft_expr_priv(expr
);
583 if (nft_dump_register(skb
, NFTA_CT_DREG
, priv
->dreg
))
584 goto nla_put_failure
;
585 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
586 goto nla_put_failure
;
591 case NFT_CT_PROTO_SRC
:
592 case NFT_CT_PROTO_DST
:
593 if (nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
594 goto nla_put_failure
;
600 if (priv
->dir
< IP_CT_DIR_MAX
&&
601 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
602 goto nla_put_failure
;
614 static int nft_ct_set_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
616 const struct nft_ct
*priv
= nft_expr_priv(expr
);
618 if (nft_dump_register(skb
, NFTA_CT_SREG
, priv
->sreg
))
619 goto nla_put_failure
;
620 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
621 goto nla_put_failure
;
625 if (priv
->dir
< IP_CT_DIR_MAX
&&
626 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
627 goto nla_put_failure
;
639 static struct nft_expr_type nft_ct_type
;
640 static const struct nft_expr_ops nft_ct_get_ops
= {
641 .type
= &nft_ct_type
,
642 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
643 .eval
= nft_ct_get_eval
,
644 .init
= nft_ct_get_init
,
645 .destroy
= nft_ct_get_destroy
,
646 .dump
= nft_ct_get_dump
,
649 static const struct nft_expr_ops nft_ct_set_ops
= {
650 .type
= &nft_ct_type
,
651 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
652 .eval
= nft_ct_set_eval
,
653 .init
= nft_ct_set_init
,
654 .destroy
= nft_ct_set_destroy
,
655 .dump
= nft_ct_set_dump
,
658 #ifdef CONFIG_NF_CONNTRACK_ZONES
659 static const struct nft_expr_ops nft_ct_set_zone_ops
= {
660 .type
= &nft_ct_type
,
661 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
662 .eval
= nft_ct_set_zone_eval
,
663 .init
= nft_ct_set_init
,
664 .destroy
= nft_ct_set_destroy
,
665 .dump
= nft_ct_set_dump
,
669 static const struct nft_expr_ops
*
670 nft_ct_select_ops(const struct nft_ctx
*ctx
,
671 const struct nlattr
* const tb
[])
673 if (tb
[NFTA_CT_KEY
] == NULL
)
674 return ERR_PTR(-EINVAL
);
676 if (tb
[NFTA_CT_DREG
] && tb
[NFTA_CT_SREG
])
677 return ERR_PTR(-EINVAL
);
679 if (tb
[NFTA_CT_DREG
])
680 return &nft_ct_get_ops
;
682 if (tb
[NFTA_CT_SREG
]) {
683 #ifdef CONFIG_NF_CONNTRACK_ZONES
684 if (nla_get_be32(tb
[NFTA_CT_KEY
]) == htonl(NFT_CT_ZONE
))
685 return &nft_ct_set_zone_ops
;
687 return &nft_ct_set_ops
;
690 return ERR_PTR(-EINVAL
);
693 static struct nft_expr_type nft_ct_type __read_mostly
= {
695 .select_ops
= &nft_ct_select_ops
,
696 .policy
= nft_ct_policy
,
697 .maxattr
= NFTA_CT_MAX
,
698 .owner
= THIS_MODULE
,
701 static void nft_notrack_eval(const struct nft_expr
*expr
,
702 struct nft_regs
*regs
,
703 const struct nft_pktinfo
*pkt
)
705 struct sk_buff
*skb
= pkt
->skb
;
706 enum ip_conntrack_info ctinfo
;
709 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
710 /* Previously seen (loopback or untracked)? Ignore. */
714 ct
= nf_ct_untracked_get();
715 atomic_inc(&ct
->ct_general
.use
);
716 nf_ct_set(skb
, ct
, IP_CT_NEW
);
719 static struct nft_expr_type nft_notrack_type
;
720 static const struct nft_expr_ops nft_notrack_ops
= {
721 .type
= &nft_notrack_type
,
722 .size
= NFT_EXPR_SIZE(0),
723 .eval
= nft_notrack_eval
,
726 static struct nft_expr_type nft_notrack_type __read_mostly
= {
728 .ops
= &nft_notrack_ops
,
729 .owner
= THIS_MODULE
,
732 static int __init
nft_ct_module_init(void)
736 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE
> NFT_REG_SIZE
);
738 err
= nft_register_expr(&nft_ct_type
);
742 err
= nft_register_expr(&nft_notrack_type
);
748 nft_unregister_expr(&nft_ct_type
);
752 static void __exit
nft_ct_module_exit(void)
754 nft_unregister_expr(&nft_notrack_type
);
755 nft_unregister_expr(&nft_ct_type
);
758 module_init(nft_ct_module_init
);
759 module_exit(nft_ct_module_exit
);
761 MODULE_LICENSE("GPL");
762 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
763 MODULE_ALIAS_NFT_EXPR("ct");
764 MODULE_ALIAS_NFT_EXPR("notrack");